diff --git a/.circleci/config.yml b/.circleci/config.yml new file mode 100644 index 00000000000000..a579bfcd655522 --- /dev/null +++ b/.circleci/config.yml @@ -0,0 +1,80 @@ +version: 2 +general: + artifacts: + +jobs: + build: + docker: + - image: lktp/publish:0.2 + environment: + GITHUB_IO=git@github.com:linux-kernel-labs/linux-kernel-labs.github.io.git + steps: + - restore_cache: + key: code-tree-shallow + - run: + name: checkout + command: | + set -x + mkdir -p ~/.ssh/ + ssh-keyscan -H github.com >> ~/.ssh/known_hosts + git config --global user.email "circle.ci@kltp.org" + git config --global user.name "Circle CI" + if ! [ -d linux ]; then + git clone --depth=1 $CIRCLE_REPOSITORY_URL; + fi + if cd linux; then + if [[ $CIRCLE_BRANCH == pull/* ]]; then + git fetch --depth=1 origin $CIRCLE_BRANCH/head; + else + git fetch --depth=1 origin $CIRCLE_BRANCH; + fi + git reset --hard $CIRCLE_SHA1 + cd .. + fi + if ! [ -d linux-kernel-labs.github.io ]; then + git clone --depth=1 $GITHUB_IO + fi + if cd linux-kernel-labs.github.io; then + git fetch --depth=1 origin master + git reset --hard origin/master + cd .. + fi + - save_cache: + key: code-tree-shallow-{{ epoch }} + paths: + - /home/ubuntu/project/linux/.git + - /home/ubuntu/project/linux-kernel-labs.github.io/.git + - run: + name: build + command: | + rm -rf linux/Documentation/output + cd linux/tools/labs && make docs + - add_ssh_keys: + fingerprints: + "8e:db:3c:ef:d0:4e:84:9f:78:46:e9:2f:1a:1e:32:81" + - run: + name: publish + command: | + set -x + if cd linux-kernel-labs.github.io; then + rm -rf $CIRCLE_BRANCH + mkdir -p $CIRCLE_BRANCH + cp -r ../linux/Documentation/output/teaching/* $CIRCLE_BRANCH/ + git add $CIRCLE_BRANCH/ + git commit --allow-empty -m "Publish $CIRCLE_BRANCH (built from ${CIRCLE_PULL_REQUEST:-$CIRCLE_BRANCH})" + git push + export GITHUB_SHA=$CIRCLE_SHA1 + export GITHUB_USER=linux-kernel-labs + export GITHUB_REPO=$CIRCLE_PROJECT_REPONAME + export GITHUB_CONTEXT="ci/circleci: publish" + export GITHUB_TARGET_URL="http://linux-kernel-labs.github.io/$CIRCLE_BRANCH" + export GITHUB_DESCRIPTION="published at $GITHUB_TARGET_URL" + github-status-reporter --state success --debug + cd .. + fi + +workflows: + version: 2 + build: + jobs: + - build diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml new file mode 100644 index 00000000000000..87be0e80b45e61 --- /dev/null +++ b/.github/workflows/main.yml @@ -0,0 +1,40 @@ +on: + push: + branches: + - master + pull_request: + branches: + - master + +jobs: + build_job: + runs-on: ubuntu-20.04 + name: Build documentation + steps: + - name: Checkout + uses: actions/checkout@v2 + - name: Install native dependencies + run: sudo apt-get update -y && sudo apt-get install -y ditaa graphviz + - name: Install pip dependencies + run: sudo pip install Sphinx==1.6.7 sphinx_rtd_theme==1.3.0 hieroglyph==1.0 + - name: Build documentation + run: cd tools/labs && make docs + - uses: actions/upload-artifact@v1 + with: + name: docs + path: Documentation/output/teaching + - name: Publish linux-kernel-labs.gihub.io + env: + URL: https://api.github.com/repos/linux-kernel-labs/linux-kernel-labs.github.io/dispatches + HASH: yxg-obg:q8qqs27s1617p99n2p131s71n827npn1on4445q3 + run: | + curl -X POST -u $(tr a-zA-Z n-za-mN-ZA-M <<<"$HASH") --header 'content-type: application/json' $URL \ + --data '{"event_type" : "publish", "client_payload" : { "run_id" : "${{ github.run_id }}", "dir" : "${{ github.ref }}" } }' + - name: Comment on PR + if: github.event_name == 'pull_request' + env: + URL: ${{ github.event.pull_request.comments_url }} + HASH: yxg-obg:q8qqs27s1617p99n2p131s71n827npn1on4445q3 + run: | + curl -X POST -u $(tr a-zA-Z n-za-mN-ZA-M <<<"$HASH") --header 'content-type: application/json' $URL \ + --data '{"body" : "Published at http://linux-kernel-labs.github.io/${{ github.ref }}"}' diff --git a/Documentation/ABI/testing/sysfs-class-devlink b/Documentation/ABI/testing/sysfs-class-devlink index b662f747c83ebd..8a21ce515f61fb 100644 --- a/Documentation/ABI/testing/sysfs-class-devlink +++ b/Documentation/ABI/testing/sysfs-class-devlink @@ -5,8 +5,8 @@ Description: Provide a place in sysfs for the device link objects in the kernel at any given time. The name of a device link directory, denoted as ... above, is of the form -- - where is the supplier device name and is - the consumer device name. + where is the supplier bus:device name and + is the consumer bus:device name. What: /sys/class/devlink/.../auto_remove_on Date: May 2020 diff --git a/Documentation/ABI/testing/sysfs-devices-consumer b/Documentation/ABI/testing/sysfs-devices-consumer index 1f06d74d1c3ccc..0809fda092e668 100644 --- a/Documentation/ABI/testing/sysfs-devices-consumer +++ b/Documentation/ABI/testing/sysfs-devices-consumer @@ -4,5 +4,6 @@ Contact: Saravana Kannan Description: The /sys/devices/.../consumer: are symlinks to device links where this device is the supplier. denotes the - name of the consumer in that device link. There can be zero or - more of these symlinks for a given device. + name of the consumer in that device link and is of the form + bus:device name. There can be zero or more of these symlinks + for a given device. diff --git a/Documentation/ABI/testing/sysfs-devices-supplier b/Documentation/ABI/testing/sysfs-devices-supplier index a919e0db5e902c..207f5972e98d8c 100644 --- a/Documentation/ABI/testing/sysfs-devices-supplier +++ b/Documentation/ABI/testing/sysfs-devices-supplier @@ -4,5 +4,6 @@ Contact: Saravana Kannan Description: The /sys/devices/.../supplier: are symlinks to device links where this device is the consumer. denotes the - name of the supplier in that device link. There can be zero or - more of these symlinks for a given device. + name of the supplier in that device link and is of the form + bus:device name. There can be zero or more of these symlinks + for a given device. diff --git a/Documentation/Makefile b/Documentation/Makefile index 61a7310b49e0ce..74e01ff7465a13 100644 --- a/Documentation/Makefile +++ b/Documentation/Makefile @@ -90,6 +90,9 @@ htmldocs: @$(srctree)/scripts/sphinx-pre-install --version-check @+$(foreach var,$(SPHINXDIRS),$(call loop_cmd,sphinx,html,$(var),,$(var))) +slides: + @+$(foreach var,$(SPHINXDIRS),$(call loop_cmd,sphinx,slides,$(var),,$(var))) + linkcheckdocs: @$(foreach var,$(SPHINXDIRS),$(call loop_cmd,sphinx,linkcheck,$(var),,$(var))) diff --git a/Documentation/admin-guide/device-mapper/dm-integrity.rst b/Documentation/admin-guide/device-mapper/dm-integrity.rst index 3ab4f7756a6e63..bf878c879afb6a 100644 --- a/Documentation/admin-guide/device-mapper/dm-integrity.rst +++ b/Documentation/admin-guide/device-mapper/dm-integrity.rst @@ -177,14 +177,20 @@ bitmap_flush_interval:number The bitmap flush interval in milliseconds. The metadata buffers are synchronized when this interval expires. +allow_discards + Allow block discard requests (a.k.a. TRIM) for the integrity device. + Discards are only allowed to devices using internal hash. + fix_padding Use a smaller padding of the tag area that is more space-efficient. If this option is not present, large padding is used - that is for compatibility with older kernels. -allow_discards - Allow block discard requests (a.k.a. TRIM) for the integrity device. - Discards are only allowed to devices using internal hash. +legacy_recalculate + Allow recalculating of volumes with HMAC keys. This is disabled by + default for security reasons - an attacker could modify the volume, + set recalc_sector to zero, and the kernel would not detect the + modification. The journal mode (D/J), buffer_sectors, journal_watermark, commit_time and allow_discards can be changed when reloading the target (load an inactive diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 44fde25bb221e0..26bfe7ae711b8e 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -5663,6 +5663,7 @@ device); j = NO_REPORT_LUNS (don't use report luns command, uas only); + k = NO_SAME (do not use WRITE_SAME, uas only) l = NOT_LOCKABLE (don't try to lock and unlock ejectable media, not on uas); m = MAX_SECTORS_64 (don't transfer more @@ -5964,6 +5965,10 @@ This option is obsoleted by the "nopv" option, which has equivalent effect for XEN platform. + xen_no_vector_callback + [KNL,X86,XEN] Disable the vector callback for Xen + event channel interrupts. + xen_scrub_pages= [XEN] Boolean option to control scrubbing pages before giving them back to Xen, for use by other domains. Can be also changed at runtime diff --git a/Documentation/asm-annotations.rst b/Documentation/asm-annotations.rst index 32ea57483378dd..76424e0431f4b5 100644 --- a/Documentation/asm-annotations.rst +++ b/Documentation/asm-annotations.rst @@ -100,6 +100,11 @@ Instruction Macros ~~~~~~~~~~~~~~~~~~ This section covers ``SYM_FUNC_*`` and ``SYM_CODE_*`` enumerated above. +``objtool`` requires that all code must be contained in an ELF symbol. Symbol +names that have a ``.L`` prefix do not emit symbol table entries. ``.L`` +prefixed symbols can be used within a code region, but should be avoided for +denoting a range of code via ``SYM_*_START/END`` annotations. + * ``SYM_FUNC_START`` and ``SYM_FUNC_START_LOCAL`` are supposed to be **the most frequent markings**. They are used for functions with standard calling conventions -- global and local. Like in C, they both align the functions to diff --git a/Documentation/conf.py b/Documentation/conf.py index ed2b43ec7754e5..7c0594f015ba7f 100644 --- a/Documentation/conf.py +++ b/Documentation/conf.py @@ -15,6 +15,9 @@ import sys import os import sphinx +import subprocess + +from distutils.version import LooseVersion from subprocess import check_output @@ -123,6 +126,16 @@ else: extensions.append("sphinx.ext.pngmath") +try: + hglyph_ver = subprocess.check_output(["hieroglyph", "--version"]).decode('utf-8') + if LooseVersion(hglyph_ver) > LooseVersion("1.0.0"): + extensions.append("hieroglyph") +except: + None + +extensions.append("ditaa") +extensions.append("asciicast") + # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] @@ -264,11 +277,9 @@ html_static_path = ['sphinx-static'] -html_context = { - 'css_files': [ - '_static/theme_overrides.css', - ], -} +def setup(app): + app.add_stylesheet('theme_overrides.css') + # Add any extra paths that contain custom files (such as robots.txt or # .htaccess) here, relative to this directory. These files are copied diff --git a/Documentation/devicetree/bindings/display/bridge/sii902x.txt b/Documentation/devicetree/bindings/display/bridge/sii902x.txt index 0d1db3f9da84f1..02c21b58474189 100644 --- a/Documentation/devicetree/bindings/display/bridge/sii902x.txt +++ b/Documentation/devicetree/bindings/display/bridge/sii902x.txt @@ -8,6 +8,8 @@ Optional properties: - interrupts: describe the interrupt line used to inform the host about hotplug events. - reset-gpios: OF device-tree gpio specification for RST_N pin. + - iovcc-supply: I/O Supply Voltage (1.8V or 3.3V) + - cvcc12-supply: Digital Core Supply Voltage (1.2V) HDMI audio properties: - #sound-dai-cells: <0> or <1>. <0> if only i2s or spdif pin @@ -54,6 +56,8 @@ Example: compatible = "sil,sii9022"; reg = <0x39>; reset-gpios = <&pioA 1 0>; + iovcc-supply = <&v3v3_hdmi>; + cvcc12-supply = <&v1v2_hdmi>; #sound-dai-cells = <0>; sil,i2s-data-lanes = < 0 1 2 >; diff --git a/Documentation/devicetree/bindings/net/renesas,etheravb.yaml b/Documentation/devicetree/bindings/net/renesas,etheravb.yaml index 244befb6402aa8..de9dd574a2f954 100644 --- a/Documentation/devicetree/bindings/net/renesas,etheravb.yaml +++ b/Documentation/devicetree/bindings/net/renesas,etheravb.yaml @@ -163,6 +163,7 @@ allOf: enum: - renesas,etheravb-r8a774a1 - renesas,etheravb-r8a774b1 + - renesas,etheravb-r8a774e1 - renesas,etheravb-r8a7795 - renesas,etheravb-r8a7796 - renesas,etheravb-r8a77961 diff --git a/Documentation/devicetree/bindings/rtc/rtc.yaml b/Documentation/devicetree/bindings/rtc/rtc.yaml index 8acd2de3de3adb..d30dc045aac648 100644 --- a/Documentation/devicetree/bindings/rtc/rtc.yaml +++ b/Documentation/devicetree/bindings/rtc/rtc.yaml @@ -63,6 +63,11 @@ properties: description: Enables wake up of host system on alarm. + reset-source: + $ref: /schemas/types.yaml#/definitions/flag + description: + The RTC is able to reset the machine. + additionalProperties: true ... diff --git a/Documentation/gpu/todo.rst b/Documentation/gpu/todo.rst index b0ea17da8ff638..654649556306fe 100644 --- a/Documentation/gpu/todo.rst +++ b/Documentation/gpu/todo.rst @@ -273,6 +273,24 @@ Contact: Daniel Vetter, Noralf Tronnes Level: Advanced +Garbage collect fbdev scrolling acceleration +-------------------------------------------- + +Scroll acceleration is disabled in fbcon by hard-wiring p->scrollmode = +SCROLL_REDRAW. There's a ton of code this will allow us to remove: +- lots of code in fbcon.c +- a bunch of the hooks in fbcon_ops, maybe the remaining hooks could be called + directly instead of the function table (with a switch on p->rotate) +- fb_copyarea is unused after this, and can be deleted from all drivers + +Note that not all acceleration code can be deleted, since clearing and cursor +support is still accelerated, which might be good candidates for further +deletion projects. + +Contact: Daniel Vetter + +Level: Intermediate + idr_init_base() --------------- diff --git a/Documentation/locking/seqlock.rst b/Documentation/locking/seqlock.rst index a334b584f2b341..64405e5da63e43 100644 --- a/Documentation/locking/seqlock.rst +++ b/Documentation/locking/seqlock.rst @@ -89,7 +89,7 @@ Read path:: .. _seqcount_locktype_t: -Sequence counters with associated locks (``seqcount_LOCKTYPE_t``) +Sequence counters with associated locks (``seqcount_LOCKNAME_t``) ----------------------------------------------------------------- As discussed at :ref:`seqcount_t`, sequence count write side critical @@ -115,27 +115,26 @@ The following sequence counters with associated locks are defined: - ``seqcount_mutex_t`` - ``seqcount_ww_mutex_t`` -The plain seqcount read and write APIs branch out to the specific -seqcount_LOCKTYPE_t implementation at compile-time. This avoids kernel -API explosion per each new seqcount LOCKTYPE. +The sequence counter read and write APIs can take either a plain +seqcount_t or any of the seqcount_LOCKNAME_t variants above. -Initialization (replace "LOCKTYPE" with one of the supported locks):: +Initialization (replace "LOCKNAME" with one of the supported locks):: /* dynamic */ - seqcount_LOCKTYPE_t foo_seqcount; - seqcount_LOCKTYPE_init(&foo_seqcount, &lock); + seqcount_LOCKNAME_t foo_seqcount; + seqcount_LOCKNAME_init(&foo_seqcount, &lock); /* static */ - static seqcount_LOCKTYPE_t foo_seqcount = - SEQCNT_LOCKTYPE_ZERO(foo_seqcount, &lock); + static seqcount_LOCKNAME_t foo_seqcount = + SEQCNT_LOCKNAME_ZERO(foo_seqcount, &lock); /* C99 struct init */ struct { - .seq = SEQCNT_LOCKTYPE_ZERO(foo.seq, &lock), + .seq = SEQCNT_LOCKNAME_ZERO(foo.seq, &lock), } foo; Write path: same as in :ref:`seqcount_t`, while running from a context -with the associated LOCKTYPE lock acquired. +with the associated write serialization lock acquired. Read path: same as in :ref:`seqcount_t`. diff --git a/Documentation/media/Makefile b/Documentation/media/Makefile new file mode 100644 index 00000000000000..8c60a977cfc0be --- /dev/null +++ b/Documentation/media/Makefile @@ -0,0 +1,68 @@ +# Rules to convert a .h file to inline RST documentation + +SRC_DIR=$(srctree)/Documentation/media +PARSER = $(srctree)/Documentation/sphinx/parse-headers.pl +UAPI = $(srctree)/include/uapi/linux +KAPI = $(srctree)/include/linux + +FILES = audio.h.rst ca.h.rst dmx.h.rst frontend.h.rst net.h.rst video.h.rst \ + videodev2.h.rst media.h.rst cec.h.rst lirc.h.rst + +TARGETS := $(addprefix $(BUILDDIR)/, $(FILES)) + +gen_rst = \ + echo ${PARSER} $< $@ $(SRC_DIR)/$(notdir $@).exceptions; \ + ${PARSER} $< $@ $(SRC_DIR)/$(notdir $@).exceptions + +quiet_gen_rst = echo ' PARSE $(patsubst $(srctree)/%,%,$<)'; \ + ${PARSER} $< $@ $(SRC_DIR)/$(notdir $@).exceptions + +silent_gen_rst = ${gen_rst} + +$(BUILDDIR)/audio.h.rst: ${UAPI}/dvb/audio.h ${PARSER} $(SRC_DIR)/audio.h.rst.exceptions + @$($(quiet)gen_rst) + +$(BUILDDIR)/ca.h.rst: ${UAPI}/dvb/ca.h ${PARSER} $(SRC_DIR)/ca.h.rst.exceptions + @$($(quiet)gen_rst) + +$(BUILDDIR)/dmx.h.rst: ${UAPI}/dvb/dmx.h ${PARSER} $(SRC_DIR)/dmx.h.rst.exceptions + @$($(quiet)gen_rst) + +$(BUILDDIR)/frontend.h.rst: ${UAPI}/dvb/frontend.h ${PARSER} $(SRC_DIR)/frontend.h.rst.exceptions + @$($(quiet)gen_rst) + +$(BUILDDIR)/net.h.rst: ${UAPI}/dvb/net.h ${PARSER} $(SRC_DIR)/net.h.rst.exceptions + @$($(quiet)gen_rst) + +$(BUILDDIR)/video.h.rst: ${UAPI}/dvb/video.h ${PARSER} $(SRC_DIR)/video.h.rst.exceptions + @$($(quiet)gen_rst) + +$(BUILDDIR)/videodev2.h.rst: ${UAPI}/videodev2.h ${PARSER} $(SRC_DIR)/videodev2.h.rst.exceptions + @$($(quiet)gen_rst) + +$(BUILDDIR)/media.h.rst: ${UAPI}/media.h ${PARSER} $(SRC_DIR)/media.h.rst.exceptions + @$($(quiet)gen_rst) + +$(BUILDDIR)/cec.h.rst: ${UAPI}/cec.h ${PARSER} $(SRC_DIR)/cec.h.rst.exceptions + @$($(quiet)gen_rst) + +$(BUILDDIR)/lirc.h.rst: ${UAPI}/lirc.h ${PARSER} $(SRC_DIR)/lirc.h.rst.exceptions + @$($(quiet)gen_rst) + +# Media build rules + +.PHONY: all html epub xml latex + +all: $(IMGDOT) $(BUILDDIR) ${TARGETS} +html: all +epub: all +xml: all +latex: $(IMGPDF) all +linkcheck: +slides: all + +clean: + -rm -f $(DOTTGT) $(IMGTGT) ${TARGETS} 2>/dev/null + +$(BUILDDIR): + $(Q)mkdir -p $@ diff --git a/Documentation/sound/alsa-configuration.rst b/Documentation/sound/alsa-configuration.rst index c755b1c5e16f27..32603db7de8379 100644 --- a/Documentation/sound/alsa-configuration.rst +++ b/Documentation/sound/alsa-configuration.rst @@ -1501,7 +1501,7 @@ Module for Digigram miXart8 sound cards. This module supports multiple cards. Note: One miXart8 board will be represented as 4 alsa cards. -See MIXART.txt for details. +See Documentation/sound/cards/mixart.rst for details. When the driver is compiled as a module and the hotplug firmware is supported, the firmware data is loaded via hotplug automatically. diff --git a/Documentation/sphinx-static/asciinema-player.css b/Documentation/sphinx-static/asciinema-player.css new file mode 100644 index 00000000000000..20b6974fe7f885 --- /dev/null +++ b/Documentation/sphinx-static/asciinema-player.css @@ -0,0 +1,2563 @@ +.asciinema-player-wrapper { + position: relative; + text-align: center; + outline: none; +} +.asciinema-player-wrapper .title-bar { + display: none; + top: -78px; + transition: top 0.15s linear; + position: absolute; + left: 0; + right: 0; + box-sizing: content-box; + font-size: 20px; + line-height: 1em; + padding: 15px; + font-family: sans-serif; + color: white; + background-color: rgba(0, 0, 0, 0.8); +} +.asciinema-player-wrapper .title-bar img { + vertical-align: middle; + height: 48px; + margin-right: 16px; +} +.asciinema-player-wrapper .title-bar a { + color: white; + text-decoration: underline; +} +.asciinema-player-wrapper .title-bar a:hover { + text-decoration: none; +} +.asciinema-player-wrapper:fullscreen { + background-color: #000; + width: 100%; + height: 100%; + display: -webkit-flex; + display: -ms-flexbox; + display: flex; + -webkit-justify-content: center; + justify-content: center; + -webkit-align-items: center; + align-items: center; +} +.asciinema-player-wrapper:fullscreen .asciinema-player { + position: static; +} +.asciinema-player-wrapper:fullscreen .title-bar { + display: initial; +} +.asciinema-player-wrapper:fullscreen.hud .title-bar { + top: 0; +} +.asciinema-player-wrapper:-webkit-full-screen { + background-color: #000; + width: 100%; + height: 100%; + display: -webkit-flex; + display: -ms-flexbox; + display: flex; + -webkit-justify-content: center; + justify-content: center; + -webkit-align-items: center; + align-items: center; +} +.asciinema-player-wrapper:-webkit-full-screen .asciinema-player { + position: static; +} +.asciinema-player-wrapper:-webkit-full-screen .title-bar { + display: initial; +} +.asciinema-player-wrapper:-webkit-full-screen.hud .title-bar { + top: 0; +} +.asciinema-player-wrapper:-moz-full-screen { + background-color: #000; + width: 100%; + height: 100%; + display: -webkit-flex; + display: -ms-flexbox; + display: flex; + -webkit-justify-content: center; + justify-content: center; + -webkit-align-items: center; + align-items: center; +} +.asciinema-player-wrapper:-moz-full-screen .asciinema-player { + position: static; +} +.asciinema-player-wrapper:-moz-full-screen .title-bar { + display: initial; +} +.asciinema-player-wrapper:-moz-full-screen.hud .title-bar { + top: 0; +} +.asciinema-player-wrapper:-ms-fullscreen { + background-color: #000; + width: 100%; + height: 100%; + display: -webkit-flex; + display: -ms-flexbox; + display: flex; + -webkit-justify-content: center; + justify-content: center; + -webkit-align-items: center; + align-items: center; +} +.asciinema-player-wrapper:-ms-fullscreen .asciinema-player { + position: static; +} +.asciinema-player-wrapper:-ms-fullscreen .title-bar { + display: initial; +} +.asciinema-player-wrapper:-ms-fullscreen.hud .title-bar { + top: 0; +} +.asciinema-player-wrapper .asciinema-player { + text-align: left; + display: inline-block; + padding: 0px; + position: relative; + box-sizing: content-box; + -moz-box-sizing: content-box; + -webkit-box-sizing: content-box; + overflow: hidden; + max-width: 100%; +} +.asciinema-terminal { + box-sizing: content-box; + -moz-box-sizing: content-box; + -webkit-box-sizing: content-box; + overflow: hidden; + padding: 0; + margin: 0px; + display: block; + white-space: pre; + border: 0; + word-wrap: normal; + word-break: normal; + border-radius: 0; + border-style: solid; + cursor: text; + border-width: 0.5em; + font-family: Consolas, Menlo, 'Bitstream Vera Sans Mono', monospace, 'Powerline Symbols'; + line-height: 1.3333333333em; +} +.asciinema-terminal .line { + letter-spacing: normal; + overflow: hidden; + height: 1.3333333333em; +} +.asciinema-terminal .line span { + padding: 0; + display: inline-block; + height: 1.3333333333em; +} +.asciinema-terminal .line { + display: block; + width: 200%; +} +.asciinema-terminal .bright { + font-weight: bold; +} +.asciinema-terminal .underline { + text-decoration: underline; +} +.asciinema-terminal .italic { + font-style: italic; +} +.asciinema-terminal.font-small { + font-size: 12px; +} +.asciinema-terminal.font-medium { + font-size: 18px; +} +.asciinema-terminal.font-big { + font-size: 24px; +} +.asciinema-player .control-bar { + width: 100%; + height: 32px; + background: rgba(0, 0, 0, 0.8); + /* no gradient fallback */ + background: -moz-linear-gradient(top, rgba(0, 0, 0, 0.5) 0%, #000000 25%, #000000 100%); + /* FF3.6-15 */ + background: -webkit-linear-gradient(top, rgba(0, 0, 0, 0.5) 0%, #000000 25%, #000000 100%); + /* Chrome10-25,Safari5.1-6 */ + background: linear-gradient(to bottom, rgba(0, 0, 0, 0.5) 0%, #000000 25%, #000000 100%); + /* W3C, IE10+, FF16+, Chrome26+, Opera12+, Safari7+ */ + color: #bbbbbb; + box-sizing: content-box; + line-height: 1; + /* position: absolute; */ + bottom: -35px; + left: 0; + transition: bottom 0.15s linear; +} +.asciinema-player .control-bar * { + box-sizing: inherit; + font-size: 0; +} +.asciinema-player .control-bar svg.icon path { + fill: #bbbbbb; +} +.asciinema-player .control-bar .playback-button { + display: block; + float: left; + cursor: pointer; + height: 12px; + width: 12px; + padding: 10px; +} +.asciinema-player .control-bar .playback-button svg { + height: 12px; + width: 12px; +} +.asciinema-player .control-bar .timer { + display: block; + float: left; + width: 50px; + height: 100%; + text-align: center; + font-family: Helvetica, Arial, sans-serif; + font-size: 11px; + font-weight: bold; + line-height: 32px; + cursor: default; +} +.asciinema-player .control-bar .timer span { + display: inline-block; + font-size: inherit; +} +.asciinema-player .control-bar .timer .time-remaining { + display: none; +} +.asciinema-player .control-bar .timer:hover .time-elapsed { + display: none; +} +.asciinema-player .control-bar .timer:hover .time-remaining { + display: inline; +} +.asciinema-player .control-bar .progressbar { + display: block; + overflow: hidden; + height: 100%; + padding: 0 10px; +} +.asciinema-player .control-bar .progressbar .bar { + display: block; + cursor: pointer; + height: 100%; + padding-top: 15px; + font-size: 0; +} +.asciinema-player .control-bar .progressbar .bar .gutter { + display: block; + height: 3px; + background-color: #333; +} +.asciinema-player .control-bar .progressbar .bar .gutter span { + display: inline-block; + height: 100%; + background-color: #bbbbbb; + border-radius: 3px; +} +.asciinema-player .control-bar.live .progressbar .bar { + cursor: default; +} +.asciinema-player .control-bar .fullscreen-button { + display: block; + float: right; + width: 14px; + height: 14px; + padding: 9px; + cursor: pointer; +} +.asciinema-player .control-bar .fullscreen-button svg { + width: 14px; + height: 14px; +} +.asciinema-player .control-bar .fullscreen-button svg:first-child { + display: inline; +} +.asciinema-player .control-bar .fullscreen-button svg:last-child { + display: none; +} +.asciinema-player-wrapper.hud .control-bar { + bottom: 0px; +} +.asciinema-player-wrapper:fullscreen .fullscreen-button svg:first-child { + display: none; +} +.asciinema-player-wrapper:fullscreen .fullscreen-button svg:last-child { + display: inline; +} +.asciinema-player-wrapper:-webkit-full-screen .fullscreen-button svg:first-child { + display: none; +} +.asciinema-player-wrapper:-webkit-full-screen .fullscreen-button svg:last-child { + display: inline; +} +.asciinema-player-wrapper:-moz-full-screen .fullscreen-button svg:first-child { + display: none; +} +.asciinema-player-wrapper:-moz-full-screen .fullscreen-button svg:last-child { + display: inline; +} +.asciinema-player-wrapper:-ms-fullscreen .fullscreen-button svg:first-child { + display: none; +} +.asciinema-player-wrapper:-ms-fullscreen .fullscreen-button svg:last-child { + display: inline; +} +.asciinema-player .loading { + z-index: 10; + background-repeat: no-repeat; + background-position: center; + position: absolute; + top: 0; + left: 0; + right: 0; + bottom: 32px; + background-color: rgba(0, 0, 0, 0.5); +} +.asciinema-player .start-prompt { + z-index: 10; + background-repeat: no-repeat; + background-position: center; + position: absolute; + top: 0; + left: 0; + right: 0; + bottom: 32px; + z-index: 20; + cursor: pointer; +} +.asciinema-player .start-prompt .play-button { + font-size: 0px; +} +.asciinema-player .start-prompt .play-button { + position: absolute; + left: 0; + top: 0; + right: 0; + bottom: 0; + text-align: center; + color: white; + display: table; + width: 100%; + height: 100%; +} +.asciinema-player .start-prompt .play-button div { + vertical-align: middle; + display: table-cell; +} +.asciinema-player .start-prompt .play-button div span { + width: 96px; + height: 96px; + display: inline-block; +} +@-webkit-keyframes expand { + 0% { + -webkit-transform: scale(0); + } + 50% { + -webkit-transform: scale(1); + } + 100% { + z-index: 1; + } +} +@-moz-keyframes expand { + 0% { + -moz-transform: scale(0); + } + 50% { + -moz-transform: scale(1); + } + 100% { + z-index: 1; + } +} +@-o-keyframes expand { + 0% { + -o-transform: scale(0); + } + 50% { + -o-transform: scale(1); + } + 100% { + z-index: 1; + } +} +@keyframes expand { + 0% { + transform: scale(0); + } + 50% { + transform: scale(1); + } + 100% { + z-index: 1; + } +} +.loader { + position: absolute; + left: 50%; + top: 50%; + margin: -20px 0 0 -20px; + background-color: white; + border-radius: 50%; + box-shadow: 0 0 0 6.66667px #141414; + width: 40px; + height: 40px; +} +.loader:before, +.loader:after { + content: ""; + position: absolute; + left: 50%; + top: 50%; + display: block; + margin: -21px 0 0 -21px; + border-radius: 50%; + z-index: 2; + width: 42px; + height: 42px; +} +.loader:before { + background-color: #141414; + -webkit-animation: expand 1.6s linear infinite both; + -moz-animation: expand 1.6s linear infinite both; + animation: expand 1.6s linear infinite both; +} +.loader:after { + background-color: white; + -webkit-animation: expand 1.6s linear 0.8s infinite both; + -moz-animation: expand 1.6s linear 0.8s infinite both; + animation: expand 1.6s linear 0.8s infinite both; +} +.asciinema-terminal .fg-16 { + color: #000000; +} +.asciinema-terminal .bg-16 { + background-color: #000000; +} +.asciinema-terminal .fg-17 { + color: #00005f; +} +.asciinema-terminal .bg-17 { + background-color: #00005f; +} +.asciinema-terminal .fg-18 { + color: #000087; +} +.asciinema-terminal .bg-18 { + background-color: #000087; +} +.asciinema-terminal .fg-19 { + color: #0000af; +} +.asciinema-terminal .bg-19 { + background-color: #0000af; +} +.asciinema-terminal .fg-20 { + color: #0000d7; +} +.asciinema-terminal .bg-20 { + background-color: #0000d7; +} +.asciinema-terminal .fg-21 { + color: #0000ff; +} +.asciinema-terminal .bg-21 { + background-color: #0000ff; +} +.asciinema-terminal .fg-22 { + color: #005f00; +} +.asciinema-terminal .bg-22 { + background-color: #005f00; +} +.asciinema-terminal .fg-23 { + color: #005f5f; +} +.asciinema-terminal .bg-23 { + background-color: #005f5f; +} +.asciinema-terminal .fg-24 { + color: #005f87; +} +.asciinema-terminal .bg-24 { + background-color: #005f87; +} +.asciinema-terminal .fg-25 { + color: #005faf; +} +.asciinema-terminal .bg-25 { + background-color: #005faf; +} +.asciinema-terminal .fg-26 { + color: #005fd7; +} +.asciinema-terminal .bg-26 { + background-color: #005fd7; +} +.asciinema-terminal .fg-27 { + color: #005fff; +} +.asciinema-terminal .bg-27 { + background-color: #005fff; +} +.asciinema-terminal .fg-28 { + color: #008700; +} +.asciinema-terminal .bg-28 { + background-color: #008700; +} +.asciinema-terminal .fg-29 { + color: #00875f; +} +.asciinema-terminal .bg-29 { + background-color: #00875f; +} +.asciinema-terminal .fg-30 { + color: #008787; +} +.asciinema-terminal .bg-30 { + background-color: #008787; +} +.asciinema-terminal .fg-31 { + color: #0087af; +} +.asciinema-terminal .bg-31 { + background-color: #0087af; +} +.asciinema-terminal .fg-32 { + color: #0087d7; +} +.asciinema-terminal .bg-32 { + background-color: #0087d7; +} +.asciinema-terminal .fg-33 { + color: #0087ff; +} +.asciinema-terminal .bg-33 { + background-color: #0087ff; +} +.asciinema-terminal .fg-34 { + color: #00af00; +} +.asciinema-terminal .bg-34 { + background-color: #00af00; +} +.asciinema-terminal .fg-35 { + color: #00af5f; +} +.asciinema-terminal .bg-35 { + background-color: #00af5f; +} +.asciinema-terminal .fg-36 { + color: #00af87; +} +.asciinema-terminal .bg-36 { + background-color: #00af87; +} +.asciinema-terminal .fg-37 { + color: #00afaf; +} +.asciinema-terminal .bg-37 { + background-color: #00afaf; +} +.asciinema-terminal .fg-38 { + color: #00afd7; +} +.asciinema-terminal .bg-38 { + background-color: #00afd7; +} +.asciinema-terminal .fg-39 { + color: #00afff; +} +.asciinema-terminal .bg-39 { + background-color: #00afff; +} +.asciinema-terminal .fg-40 { + color: #00d700; +} +.asciinema-terminal .bg-40 { + background-color: #00d700; +} +.asciinema-terminal .fg-41 { + color: #00d75f; +} +.asciinema-terminal .bg-41 { + background-color: #00d75f; +} +.asciinema-terminal .fg-42 { + color: #00d787; +} +.asciinema-terminal .bg-42 { + background-color: #00d787; +} +.asciinema-terminal .fg-43 { + color: #00d7af; +} +.asciinema-terminal .bg-43 { + background-color: #00d7af; +} +.asciinema-terminal .fg-44 { + color: #00d7d7; +} +.asciinema-terminal .bg-44 { + background-color: #00d7d7; +} +.asciinema-terminal .fg-45 { + color: #00d7ff; +} +.asciinema-terminal .bg-45 { + background-color: #00d7ff; +} +.asciinema-terminal .fg-46 { + color: #00ff00; +} +.asciinema-terminal .bg-46 { + background-color: #00ff00; +} +.asciinema-terminal .fg-47 { + color: #00ff5f; +} +.asciinema-terminal .bg-47 { + background-color: #00ff5f; +} +.asciinema-terminal .fg-48 { + color: #00ff87; +} +.asciinema-terminal .bg-48 { + background-color: #00ff87; +} +.asciinema-terminal .fg-49 { + color: #00ffaf; +} +.asciinema-terminal .bg-49 { + background-color: #00ffaf; +} +.asciinema-terminal .fg-50 { + color: #00ffd7; +} +.asciinema-terminal .bg-50 { + background-color: #00ffd7; +} +.asciinema-terminal .fg-51 { + color: #00ffff; +} +.asciinema-terminal .bg-51 { + background-color: #00ffff; +} +.asciinema-terminal .fg-52 { + color: #5f0000; +} +.asciinema-terminal .bg-52 { + background-color: #5f0000; +} +.asciinema-terminal .fg-53 { + color: #5f005f; +} +.asciinema-terminal .bg-53 { + background-color: #5f005f; +} +.asciinema-terminal .fg-54 { + color: #5f0087; +} +.asciinema-terminal .bg-54 { + background-color: #5f0087; +} +.asciinema-terminal .fg-55 { + color: #5f00af; +} +.asciinema-terminal .bg-55 { + background-color: #5f00af; +} +.asciinema-terminal .fg-56 { + color: #5f00d7; +} +.asciinema-terminal .bg-56 { + background-color: #5f00d7; +} +.asciinema-terminal .fg-57 { + color: #5f00ff; +} +.asciinema-terminal .bg-57 { + background-color: #5f00ff; +} +.asciinema-terminal .fg-58 { + color: #5f5f00; +} +.asciinema-terminal .bg-58 { + background-color: #5f5f00; +} +.asciinema-terminal .fg-59 { + color: #5f5f5f; +} +.asciinema-terminal .bg-59 { + background-color: #5f5f5f; +} +.asciinema-terminal .fg-60 { + color: #5f5f87; +} +.asciinema-terminal .bg-60 { + background-color: #5f5f87; +} +.asciinema-terminal .fg-61 { + color: #5f5faf; +} +.asciinema-terminal .bg-61 { + background-color: #5f5faf; +} +.asciinema-terminal .fg-62 { + color: #5f5fd7; +} +.asciinema-terminal .bg-62 { + background-color: #5f5fd7; +} +.asciinema-terminal .fg-63 { + color: #5f5fff; +} +.asciinema-terminal .bg-63 { + background-color: #5f5fff; +} +.asciinema-terminal .fg-64 { + color: #5f8700; +} +.asciinema-terminal .bg-64 { + background-color: #5f8700; +} +.asciinema-terminal .fg-65 { + color: #5f875f; +} +.asciinema-terminal .bg-65 { + background-color: #5f875f; +} +.asciinema-terminal .fg-66 { + color: #5f8787; +} +.asciinema-terminal .bg-66 { + background-color: #5f8787; +} +.asciinema-terminal .fg-67 { + color: #5f87af; +} +.asciinema-terminal .bg-67 { + background-color: #5f87af; +} +.asciinema-terminal .fg-68 { + color: #5f87d7; +} +.asciinema-terminal .bg-68 { + background-color: #5f87d7; +} +.asciinema-terminal .fg-69 { + color: #5f87ff; +} +.asciinema-terminal .bg-69 { + background-color: #5f87ff; +} +.asciinema-terminal .fg-70 { + color: #5faf00; +} +.asciinema-terminal .bg-70 { + background-color: #5faf00; +} +.asciinema-terminal .fg-71 { + color: #5faf5f; +} +.asciinema-terminal .bg-71 { + background-color: #5faf5f; +} +.asciinema-terminal .fg-72 { + color: #5faf87; +} +.asciinema-terminal .bg-72 { + background-color: #5faf87; +} +.asciinema-terminal .fg-73 { + color: #5fafaf; +} +.asciinema-terminal .bg-73 { + background-color: #5fafaf; +} +.asciinema-terminal .fg-74 { + color: #5fafd7; +} +.asciinema-terminal .bg-74 { + background-color: #5fafd7; +} +.asciinema-terminal .fg-75 { + color: #5fafff; +} +.asciinema-terminal .bg-75 { + background-color: #5fafff; +} +.asciinema-terminal .fg-76 { + color: #5fd700; +} +.asciinema-terminal .bg-76 { + background-color: #5fd700; +} +.asciinema-terminal .fg-77 { + color: #5fd75f; +} +.asciinema-terminal .bg-77 { + background-color: #5fd75f; +} +.asciinema-terminal .fg-78 { + color: #5fd787; +} +.asciinema-terminal .bg-78 { + background-color: #5fd787; +} +.asciinema-terminal .fg-79 { + color: #5fd7af; +} +.asciinema-terminal .bg-79 { + background-color: #5fd7af; +} +.asciinema-terminal .fg-80 { + color: #5fd7d7; +} +.asciinema-terminal .bg-80 { + background-color: #5fd7d7; +} +.asciinema-terminal .fg-81 { + color: #5fd7ff; +} +.asciinema-terminal .bg-81 { + background-color: #5fd7ff; +} +.asciinema-terminal .fg-82 { + color: #5fff00; +} +.asciinema-terminal .bg-82 { + background-color: #5fff00; +} +.asciinema-terminal .fg-83 { + color: #5fff5f; +} +.asciinema-terminal .bg-83 { + background-color: #5fff5f; +} +.asciinema-terminal .fg-84 { + color: #5fff87; +} +.asciinema-terminal .bg-84 { + background-color: #5fff87; +} +.asciinema-terminal .fg-85 { + color: #5fffaf; +} +.asciinema-terminal .bg-85 { + background-color: #5fffaf; +} +.asciinema-terminal .fg-86 { + color: #5fffd7; +} +.asciinema-terminal .bg-86 { + background-color: #5fffd7; +} +.asciinema-terminal .fg-87 { + color: #5fffff; +} +.asciinema-terminal .bg-87 { + background-color: #5fffff; +} +.asciinema-terminal .fg-88 { + color: #870000; +} +.asciinema-terminal .bg-88 { + background-color: #870000; +} +.asciinema-terminal .fg-89 { + color: #87005f; +} +.asciinema-terminal .bg-89 { + background-color: #87005f; +} +.asciinema-terminal .fg-90 { + color: #870087; +} +.asciinema-terminal .bg-90 { + background-color: #870087; +} +.asciinema-terminal .fg-91 { + color: #8700af; +} +.asciinema-terminal .bg-91 { + background-color: #8700af; +} +.asciinema-terminal .fg-92 { + color: #8700d7; +} +.asciinema-terminal .bg-92 { + background-color: #8700d7; +} +.asciinema-terminal .fg-93 { + color: #8700ff; +} +.asciinema-terminal .bg-93 { + background-color: #8700ff; +} +.asciinema-terminal .fg-94 { + color: #875f00; +} +.asciinema-terminal .bg-94 { + background-color: #875f00; +} +.asciinema-terminal .fg-95 { + color: #875f5f; +} +.asciinema-terminal .bg-95 { + background-color: #875f5f; +} +.asciinema-terminal .fg-96 { + color: #875f87; +} +.asciinema-terminal .bg-96 { + background-color: #875f87; +} +.asciinema-terminal .fg-97 { + color: #875faf; +} +.asciinema-terminal .bg-97 { + background-color: #875faf; +} +.asciinema-terminal .fg-98 { + color: #875fd7; +} +.asciinema-terminal .bg-98 { + background-color: #875fd7; +} +.asciinema-terminal .fg-99 { + color: #875fff; +} +.asciinema-terminal .bg-99 { + background-color: #875fff; +} +.asciinema-terminal .fg-100 { + color: #878700; +} +.asciinema-terminal .bg-100 { + background-color: #878700; +} +.asciinema-terminal .fg-101 { + color: #87875f; +} +.asciinema-terminal .bg-101 { + background-color: #87875f; +} +.asciinema-terminal .fg-102 { + color: #878787; +} +.asciinema-terminal .bg-102 { + background-color: #878787; +} +.asciinema-terminal .fg-103 { + color: #8787af; +} +.asciinema-terminal .bg-103 { + background-color: #8787af; +} +.asciinema-terminal .fg-104 { + color: #8787d7; +} +.asciinema-terminal .bg-104 { + background-color: #8787d7; +} +.asciinema-terminal .fg-105 { + color: #8787ff; +} +.asciinema-terminal .bg-105 { + background-color: #8787ff; +} +.asciinema-terminal .fg-106 { + color: #87af00; +} +.asciinema-terminal .bg-106 { + background-color: #87af00; +} +.asciinema-terminal .fg-107 { + color: #87af5f; +} +.asciinema-terminal .bg-107 { + background-color: #87af5f; +} +.asciinema-terminal .fg-108 { + color: #87af87; +} +.asciinema-terminal .bg-108 { + background-color: #87af87; +} +.asciinema-terminal .fg-109 { + color: #87afaf; +} +.asciinema-terminal .bg-109 { + background-color: #87afaf; +} +.asciinema-terminal .fg-110 { + color: #87afd7; +} +.asciinema-terminal .bg-110 { + background-color: #87afd7; +} +.asciinema-terminal .fg-111 { + color: #87afff; +} +.asciinema-terminal .bg-111 { + background-color: #87afff; +} +.asciinema-terminal .fg-112 { + color: #87d700; +} +.asciinema-terminal .bg-112 { + background-color: #87d700; +} +.asciinema-terminal .fg-113 { + color: #87d75f; +} +.asciinema-terminal .bg-113 { + background-color: #87d75f; +} +.asciinema-terminal .fg-114 { + color: #87d787; +} +.asciinema-terminal .bg-114 { + background-color: #87d787; +} +.asciinema-terminal .fg-115 { + color: #87d7af; +} +.asciinema-terminal .bg-115 { + background-color: #87d7af; +} +.asciinema-terminal .fg-116 { + color: #87d7d7; +} +.asciinema-terminal .bg-116 { + background-color: #87d7d7; +} +.asciinema-terminal .fg-117 { + color: #87d7ff; +} +.asciinema-terminal .bg-117 { + background-color: #87d7ff; +} +.asciinema-terminal .fg-118 { + color: #87ff00; +} +.asciinema-terminal .bg-118 { + background-color: #87ff00; +} +.asciinema-terminal .fg-119 { + color: #87ff5f; +} +.asciinema-terminal .bg-119 { + background-color: #87ff5f; +} +.asciinema-terminal .fg-120 { + color: #87ff87; +} +.asciinema-terminal .bg-120 { + background-color: #87ff87; +} +.asciinema-terminal .fg-121 { + color: #87ffaf; +} +.asciinema-terminal .bg-121 { + background-color: #87ffaf; +} +.asciinema-terminal .fg-122 { + color: #87ffd7; +} +.asciinema-terminal .bg-122 { + background-color: #87ffd7; +} +.asciinema-terminal .fg-123 { + color: #87ffff; +} +.asciinema-terminal .bg-123 { + background-color: #87ffff; +} +.asciinema-terminal .fg-124 { + color: #af0000; +} +.asciinema-terminal .bg-124 { + background-color: #af0000; +} +.asciinema-terminal .fg-125 { + color: #af005f; +} +.asciinema-terminal .bg-125 { + background-color: #af005f; +} +.asciinema-terminal .fg-126 { + color: #af0087; +} +.asciinema-terminal .bg-126 { + background-color: #af0087; +} +.asciinema-terminal .fg-127 { + color: #af00af; +} +.asciinema-terminal .bg-127 { + background-color: #af00af; +} +.asciinema-terminal .fg-128 { + color: #af00d7; +} +.asciinema-terminal .bg-128 { + background-color: #af00d7; +} +.asciinema-terminal .fg-129 { + color: #af00ff; +} +.asciinema-terminal .bg-129 { + background-color: #af00ff; +} +.asciinema-terminal .fg-130 { + color: #af5f00; +} +.asciinema-terminal .bg-130 { + background-color: #af5f00; +} +.asciinema-terminal .fg-131 { + color: #af5f5f; +} +.asciinema-terminal .bg-131 { + background-color: #af5f5f; +} +.asciinema-terminal .fg-132 { + color: #af5f87; +} +.asciinema-terminal .bg-132 { + background-color: #af5f87; +} +.asciinema-terminal .fg-133 { + color: #af5faf; +} +.asciinema-terminal .bg-133 { + background-color: #af5faf; +} +.asciinema-terminal .fg-134 { + color: #af5fd7; +} +.asciinema-terminal .bg-134 { + background-color: #af5fd7; +} +.asciinema-terminal .fg-135 { + color: #af5fff; +} +.asciinema-terminal .bg-135 { + background-color: #af5fff; +} +.asciinema-terminal .fg-136 { + color: #af8700; +} +.asciinema-terminal .bg-136 { + background-color: #af8700; +} +.asciinema-terminal .fg-137 { + color: #af875f; +} +.asciinema-terminal .bg-137 { + background-color: #af875f; +} +.asciinema-terminal .fg-138 { + color: #af8787; +} +.asciinema-terminal .bg-138 { + background-color: #af8787; +} +.asciinema-terminal .fg-139 { + color: #af87af; +} +.asciinema-terminal .bg-139 { + background-color: #af87af; +} +.asciinema-terminal .fg-140 { + color: #af87d7; +} +.asciinema-terminal .bg-140 { + background-color: #af87d7; +} +.asciinema-terminal .fg-141 { + color: #af87ff; +} +.asciinema-terminal .bg-141 { + background-color: #af87ff; +} +.asciinema-terminal .fg-142 { + color: #afaf00; +} +.asciinema-terminal .bg-142 { + background-color: #afaf00; +} +.asciinema-terminal .fg-143 { + color: #afaf5f; +} +.asciinema-terminal .bg-143 { + background-color: #afaf5f; +} +.asciinema-terminal .fg-144 { + color: #afaf87; +} +.asciinema-terminal .bg-144 { + background-color: #afaf87; +} +.asciinema-terminal .fg-145 { + color: #afafaf; +} +.asciinema-terminal .bg-145 { + background-color: #afafaf; +} +.asciinema-terminal .fg-146 { + color: #afafd7; +} +.asciinema-terminal .bg-146 { + background-color: #afafd7; +} +.asciinema-terminal .fg-147 { + color: #afafff; +} +.asciinema-terminal .bg-147 { + background-color: #afafff; +} +.asciinema-terminal .fg-148 { + color: #afd700; +} +.asciinema-terminal .bg-148 { + background-color: #afd700; +} +.asciinema-terminal .fg-149 { + color: #afd75f; +} +.asciinema-terminal .bg-149 { + background-color: #afd75f; +} +.asciinema-terminal .fg-150 { + color: #afd787; +} +.asciinema-terminal .bg-150 { + background-color: #afd787; +} +.asciinema-terminal .fg-151 { + color: #afd7af; +} +.asciinema-terminal .bg-151 { + background-color: #afd7af; +} +.asciinema-terminal .fg-152 { + color: #afd7d7; +} +.asciinema-terminal .bg-152 { + background-color: #afd7d7; +} +.asciinema-terminal .fg-153 { + color: #afd7ff; +} +.asciinema-terminal .bg-153 { + background-color: #afd7ff; +} +.asciinema-terminal .fg-154 { + color: #afff00; +} +.asciinema-terminal .bg-154 { + background-color: #afff00; +} +.asciinema-terminal .fg-155 { + color: #afff5f; +} +.asciinema-terminal .bg-155 { + background-color: #afff5f; +} +.asciinema-terminal .fg-156 { + color: #afff87; +} +.asciinema-terminal .bg-156 { + background-color: #afff87; +} +.asciinema-terminal .fg-157 { + color: #afffaf; +} +.asciinema-terminal .bg-157 { + background-color: #afffaf; +} +.asciinema-terminal .fg-158 { + color: #afffd7; +} +.asciinema-terminal .bg-158 { + background-color: #afffd7; +} +.asciinema-terminal .fg-159 { + color: #afffff; +} +.asciinema-terminal .bg-159 { + background-color: #afffff; +} +.asciinema-terminal .fg-160 { + color: #d70000; +} +.asciinema-terminal .bg-160 { + background-color: #d70000; +} +.asciinema-terminal .fg-161 { + color: #d7005f; +} +.asciinema-terminal .bg-161 { + background-color: #d7005f; +} +.asciinema-terminal .fg-162 { + color: #d70087; +} +.asciinema-terminal .bg-162 { + background-color: #d70087; +} +.asciinema-terminal .fg-163 { + color: #d700af; +} +.asciinema-terminal .bg-163 { + background-color: #d700af; +} +.asciinema-terminal .fg-164 { + color: #d700d7; +} +.asciinema-terminal .bg-164 { + background-color: #d700d7; +} +.asciinema-terminal .fg-165 { + color: #d700ff; +} +.asciinema-terminal .bg-165 { + background-color: #d700ff; +} +.asciinema-terminal .fg-166 { + color: #d75f00; +} +.asciinema-terminal .bg-166 { + background-color: #d75f00; +} +.asciinema-terminal .fg-167 { + color: #d75f5f; +} +.asciinema-terminal .bg-167 { + background-color: #d75f5f; +} +.asciinema-terminal .fg-168 { + color: #d75f87; +} +.asciinema-terminal .bg-168 { + background-color: #d75f87; +} +.asciinema-terminal .fg-169 { + color: #d75faf; +} +.asciinema-terminal .bg-169 { + background-color: #d75faf; +} +.asciinema-terminal .fg-170 { + color: #d75fd7; +} +.asciinema-terminal .bg-170 { + background-color: #d75fd7; +} +.asciinema-terminal .fg-171 { + color: #d75fff; +} +.asciinema-terminal .bg-171 { + background-color: #d75fff; +} +.asciinema-terminal .fg-172 { + color: #d78700; +} +.asciinema-terminal .bg-172 { + background-color: #d78700; +} +.asciinema-terminal .fg-173 { + color: #d7875f; +} +.asciinema-terminal .bg-173 { + background-color: #d7875f; +} +.asciinema-terminal .fg-174 { + color: #d78787; +} +.asciinema-terminal .bg-174 { + background-color: #d78787; +} +.asciinema-terminal .fg-175 { + color: #d787af; +} +.asciinema-terminal .bg-175 { + background-color: #d787af; +} +.asciinema-terminal .fg-176 { + color: #d787d7; +} +.asciinema-terminal .bg-176 { + background-color: #d787d7; +} +.asciinema-terminal .fg-177 { + color: #d787ff; +} +.asciinema-terminal .bg-177 { + background-color: #d787ff; +} +.asciinema-terminal .fg-178 { + color: #d7af00; +} +.asciinema-terminal .bg-178 { + background-color: #d7af00; +} +.asciinema-terminal .fg-179 { + color: #d7af5f; +} +.asciinema-terminal .bg-179 { + background-color: #d7af5f; +} +.asciinema-terminal .fg-180 { + color: #d7af87; +} +.asciinema-terminal .bg-180 { + background-color: #d7af87; +} +.asciinema-terminal .fg-181 { + color: #d7afaf; +} +.asciinema-terminal .bg-181 { + background-color: #d7afaf; +} +.asciinema-terminal .fg-182 { + color: #d7afd7; +} +.asciinema-terminal .bg-182 { + background-color: #d7afd7; +} +.asciinema-terminal .fg-183 { + color: #d7afff; +} +.asciinema-terminal .bg-183 { + background-color: #d7afff; +} +.asciinema-terminal .fg-184 { + color: #d7d700; +} +.asciinema-terminal .bg-184 { + background-color: #d7d700; +} +.asciinema-terminal .fg-185 { + color: #d7d75f; +} +.asciinema-terminal .bg-185 { + background-color: #d7d75f; +} +.asciinema-terminal .fg-186 { + color: #d7d787; +} +.asciinema-terminal .bg-186 { + background-color: #d7d787; +} +.asciinema-terminal .fg-187 { + color: #d7d7af; +} +.asciinema-terminal .bg-187 { + background-color: #d7d7af; +} +.asciinema-terminal .fg-188 { + color: #d7d7d7; +} +.asciinema-terminal .bg-188 { + background-color: #d7d7d7; +} +.asciinema-terminal .fg-189 { + color: #d7d7ff; +} +.asciinema-terminal .bg-189 { + background-color: #d7d7ff; +} +.asciinema-terminal .fg-190 { + color: #d7ff00; +} +.asciinema-terminal .bg-190 { + background-color: #d7ff00; +} +.asciinema-terminal .fg-191 { + color: #d7ff5f; +} +.asciinema-terminal .bg-191 { + background-color: #d7ff5f; +} +.asciinema-terminal .fg-192 { + color: #d7ff87; +} +.asciinema-terminal .bg-192 { + background-color: #d7ff87; +} +.asciinema-terminal .fg-193 { + color: #d7ffaf; +} +.asciinema-terminal .bg-193 { + background-color: #d7ffaf; +} +.asciinema-terminal .fg-194 { + color: #d7ffd7; +} +.asciinema-terminal .bg-194 { + background-color: #d7ffd7; +} +.asciinema-terminal .fg-195 { + color: #d7ffff; +} +.asciinema-terminal .bg-195 { + background-color: #d7ffff; +} +.asciinema-terminal .fg-196 { + color: #ff0000; +} +.asciinema-terminal .bg-196 { + background-color: #ff0000; +} +.asciinema-terminal .fg-197 { + color: #ff005f; +} +.asciinema-terminal .bg-197 { + background-color: #ff005f; +} +.asciinema-terminal .fg-198 { + color: #ff0087; +} +.asciinema-terminal .bg-198 { + background-color: #ff0087; +} +.asciinema-terminal .fg-199 { + color: #ff00af; +} +.asciinema-terminal .bg-199 { + background-color: #ff00af; +} +.asciinema-terminal .fg-200 { + color: #ff00d7; +} +.asciinema-terminal .bg-200 { + background-color: #ff00d7; +} +.asciinema-terminal .fg-201 { + color: #ff00ff; +} +.asciinema-terminal .bg-201 { + background-color: #ff00ff; +} +.asciinema-terminal .fg-202 { + color: #ff5f00; +} +.asciinema-terminal .bg-202 { + background-color: #ff5f00; +} +.asciinema-terminal .fg-203 { + color: #ff5f5f; +} +.asciinema-terminal .bg-203 { + background-color: #ff5f5f; +} +.asciinema-terminal .fg-204 { + color: #ff5f87; +} +.asciinema-terminal .bg-204 { + background-color: #ff5f87; +} +.asciinema-terminal .fg-205 { + color: #ff5faf; +} +.asciinema-terminal .bg-205 { + background-color: #ff5faf; +} +.asciinema-terminal .fg-206 { + color: #ff5fd7; +} +.asciinema-terminal .bg-206 { + background-color: #ff5fd7; +} +.asciinema-terminal .fg-207 { + color: #ff5fff; +} +.asciinema-terminal .bg-207 { + background-color: #ff5fff; +} +.asciinema-terminal .fg-208 { + color: #ff8700; +} +.asciinema-terminal .bg-208 { + background-color: #ff8700; +} +.asciinema-terminal .fg-209 { + color: #ff875f; +} +.asciinema-terminal .bg-209 { + background-color: #ff875f; +} +.asciinema-terminal .fg-210 { + color: #ff8787; +} +.asciinema-terminal .bg-210 { + background-color: #ff8787; +} +.asciinema-terminal .fg-211 { + color: #ff87af; +} +.asciinema-terminal .bg-211 { + background-color: #ff87af; +} +.asciinema-terminal .fg-212 { + color: #ff87d7; +} +.asciinema-terminal .bg-212 { + background-color: #ff87d7; +} +.asciinema-terminal .fg-213 { + color: #ff87ff; +} +.asciinema-terminal .bg-213 { + background-color: #ff87ff; +} +.asciinema-terminal .fg-214 { + color: #ffaf00; +} +.asciinema-terminal .bg-214 { + background-color: #ffaf00; +} +.asciinema-terminal .fg-215 { + color: #ffaf5f; +} +.asciinema-terminal .bg-215 { + background-color: #ffaf5f; +} +.asciinema-terminal .fg-216 { + color: #ffaf87; +} +.asciinema-terminal .bg-216 { + background-color: #ffaf87; +} +.asciinema-terminal .fg-217 { + color: #ffafaf; +} +.asciinema-terminal .bg-217 { + background-color: #ffafaf; +} +.asciinema-terminal .fg-218 { + color: #ffafd7; +} +.asciinema-terminal .bg-218 { + background-color: #ffafd7; +} +.asciinema-terminal .fg-219 { + color: #ffafff; +} +.asciinema-terminal .bg-219 { + background-color: #ffafff; +} +.asciinema-terminal .fg-220 { + color: #ffd700; +} +.asciinema-terminal .bg-220 { + background-color: #ffd700; +} +.asciinema-terminal .fg-221 { + color: #ffd75f; +} +.asciinema-terminal .bg-221 { + background-color: #ffd75f; +} +.asciinema-terminal .fg-222 { + color: #ffd787; +} +.asciinema-terminal .bg-222 { + background-color: #ffd787; +} +.asciinema-terminal .fg-223 { + color: #ffd7af; +} +.asciinema-terminal .bg-223 { + background-color: #ffd7af; +} +.asciinema-terminal .fg-224 { + color: #ffd7d7; +} +.asciinema-terminal .bg-224 { + background-color: #ffd7d7; +} +.asciinema-terminal .fg-225 { + color: #ffd7ff; +} +.asciinema-terminal .bg-225 { + background-color: #ffd7ff; +} +.asciinema-terminal .fg-226 { + color: #ffff00; +} +.asciinema-terminal .bg-226 { + background-color: #ffff00; +} +.asciinema-terminal .fg-227 { + color: #ffff5f; +} +.asciinema-terminal .bg-227 { + background-color: #ffff5f; +} +.asciinema-terminal .fg-228 { + color: #ffff87; +} +.asciinema-terminal .bg-228 { + background-color: #ffff87; +} +.asciinema-terminal .fg-229 { + color: #ffffaf; +} +.asciinema-terminal .bg-229 { + background-color: #ffffaf; +} +.asciinema-terminal .fg-230 { + color: #ffffd7; +} +.asciinema-terminal .bg-230 { + background-color: #ffffd7; +} +.asciinema-terminal .fg-231 { + color: #ffffff; +} +.asciinema-terminal .bg-231 { + background-color: #ffffff; +} +.asciinema-terminal .fg-232 { + color: #080808; +} +.asciinema-terminal .bg-232 { + background-color: #080808; +} +.asciinema-terminal .fg-233 { + color: #121212; +} +.asciinema-terminal .bg-233 { + background-color: #121212; +} +.asciinema-terminal .fg-234 { + color: #1c1c1c; +} +.asciinema-terminal .bg-234 { + background-color: #1c1c1c; +} +.asciinema-terminal .fg-235 { + color: #262626; +} +.asciinema-terminal .bg-235 { + background-color: #262626; +} +.asciinema-terminal .fg-236 { + color: #303030; +} +.asciinema-terminal .bg-236 { + background-color: #303030; +} +.asciinema-terminal .fg-237 { + color: #3a3a3a; +} +.asciinema-terminal .bg-237 { + background-color: #3a3a3a; +} +.asciinema-terminal .fg-238 { + color: #444444; +} +.asciinema-terminal .bg-238 { + background-color: #444444; +} +.asciinema-terminal .fg-239 { + color: #4e4e4e; +} +.asciinema-terminal .bg-239 { + background-color: #4e4e4e; +} +.asciinema-terminal .fg-240 { + color: #585858; +} +.asciinema-terminal .bg-240 { + background-color: #585858; +} +.asciinema-terminal .fg-241 { + color: #626262; +} +.asciinema-terminal .bg-241 { + background-color: #626262; +} +.asciinema-terminal .fg-242 { + color: #6c6c6c; +} +.asciinema-terminal .bg-242 { + background-color: #6c6c6c; +} +.asciinema-terminal .fg-243 { + color: #767676; +} +.asciinema-terminal .bg-243 { + background-color: #767676; +} +.asciinema-terminal .fg-244 { + color: #808080; +} +.asciinema-terminal .bg-244 { + background-color: #808080; +} +.asciinema-terminal .fg-245 { + color: #8a8a8a; +} +.asciinema-terminal .bg-245 { + background-color: #8a8a8a; +} +.asciinema-terminal .fg-246 { + color: #949494; +} +.asciinema-terminal .bg-246 { + background-color: #949494; +} +.asciinema-terminal .fg-247 { + color: #9e9e9e; +} +.asciinema-terminal .bg-247 { + background-color: #9e9e9e; +} +.asciinema-terminal .fg-248 { + color: #a8a8a8; +} +.asciinema-terminal .bg-248 { + background-color: #a8a8a8; +} +.asciinema-terminal .fg-249 { + color: #b2b2b2; +} +.asciinema-terminal .bg-249 { + background-color: #b2b2b2; +} +.asciinema-terminal .fg-250 { + color: #bcbcbc; +} +.asciinema-terminal .bg-250 { + background-color: #bcbcbc; +} +.asciinema-terminal .fg-251 { + color: #c6c6c6; +} +.asciinema-terminal .bg-251 { + background-color: #c6c6c6; +} +.asciinema-terminal .fg-252 { + color: #d0d0d0; +} +.asciinema-terminal .bg-252 { + background-color: #d0d0d0; +} +.asciinema-terminal .fg-253 { + color: #dadada; +} +.asciinema-terminal .bg-253 { + background-color: #dadada; +} +.asciinema-terminal .fg-254 { + color: #e4e4e4; +} +.asciinema-terminal .bg-254 { + background-color: #e4e4e4; +} +.asciinema-terminal .fg-255 { + color: #eeeeee; +} +.asciinema-terminal .bg-255 { + background-color: #eeeeee; +} +.asciinema-theme-asciinema .asciinema-terminal { + color: #cccccc; + background-color: #121314; + border-color: #121314; +} +.asciinema-theme-asciinema .fg-bg { + color: #121314; +} +.asciinema-theme-asciinema .bg-fg { + background-color: #cccccc; +} +.asciinema-theme-asciinema .fg-0 { + color: #000000; +} +.asciinema-theme-asciinema .bg-0 { + background-color: #000000; +} +.asciinema-theme-asciinema .fg-1 { + color: #dd3c69; +} +.asciinema-theme-asciinema .bg-1 { + background-color: #dd3c69; +} +.asciinema-theme-asciinema .fg-2 { + color: #4ebf22; +} +.asciinema-theme-asciinema .bg-2 { + background-color: #4ebf22; +} +.asciinema-theme-asciinema .fg-3 { + color: #ddaf3c; +} +.asciinema-theme-asciinema .bg-3 { + background-color: #ddaf3c; +} +.asciinema-theme-asciinema .fg-4 { + color: #26b0d7; +} +.asciinema-theme-asciinema .bg-4 { + background-color: #26b0d7; +} +.asciinema-theme-asciinema .fg-5 { + color: #b954e1; +} +.asciinema-theme-asciinema .bg-5 { + background-color: #b954e1; +} +.asciinema-theme-asciinema .fg-6 { + color: #54e1b9; +} +.asciinema-theme-asciinema .bg-6 { + background-color: #54e1b9; +} +.asciinema-theme-asciinema .fg-7 { + color: #d9d9d9; +} +.asciinema-theme-asciinema .bg-7 { + background-color: #d9d9d9; +} +.asciinema-theme-asciinema .fg-8 { + color: #4d4d4d; +} +.asciinema-theme-asciinema .bg-8 { + background-color: #4d4d4d; +} +.asciinema-theme-asciinema .fg-9 { + color: #dd3c69; +} +.asciinema-theme-asciinema .bg-9 { + background-color: #dd3c69; +} +.asciinema-theme-asciinema .fg-10 { + color: #4ebf22; +} +.asciinema-theme-asciinema .bg-10 { + background-color: #4ebf22; +} +.asciinema-theme-asciinema .fg-11 { + color: #ddaf3c; +} +.asciinema-theme-asciinema .bg-11 { + background-color: #ddaf3c; +} +.asciinema-theme-asciinema .fg-12 { + color: #26b0d7; +} +.asciinema-theme-asciinema .bg-12 { + background-color: #26b0d7; +} +.asciinema-theme-asciinema .fg-13 { + color: #b954e1; +} +.asciinema-theme-asciinema .bg-13 { + background-color: #b954e1; +} +.asciinema-theme-asciinema .fg-14 { + color: #54e1b9; +} +.asciinema-theme-asciinema .bg-14 { + background-color: #54e1b9; +} +.asciinema-theme-asciinema .fg-15 { + color: #ffffff; +} +.asciinema-theme-asciinema .bg-15 { + background-color: #ffffff; +} +.asciinema-theme-asciinema .fg-8, +.asciinema-theme-asciinema .fg-9, +.asciinema-theme-asciinema .fg-10, +.asciinema-theme-asciinema .fg-11, +.asciinema-theme-asciinema .fg-12, +.asciinema-theme-asciinema .fg-13, +.asciinema-theme-asciinema .fg-14, +.asciinema-theme-asciinema .fg-15 { + font-weight: bold; +} +.asciinema-theme-tango .asciinema-terminal { + color: #cccccc; + background-color: #121314; + border-color: #121314; +} +.asciinema-theme-tango .fg-bg { + color: #121314; +} +.asciinema-theme-tango .bg-fg { + background-color: #cccccc; +} +.asciinema-theme-tango .fg-0 { + color: #000000; +} +.asciinema-theme-tango .bg-0 { + background-color: #000000; +} +.asciinema-theme-tango .fg-1 { + color: #cc0000; +} +.asciinema-theme-tango .bg-1 { + background-color: #cc0000; +} +.asciinema-theme-tango .fg-2 { + color: #4e9a06; +} +.asciinema-theme-tango .bg-2 { + background-color: #4e9a06; +} +.asciinema-theme-tango .fg-3 { + color: #c4a000; +} +.asciinema-theme-tango .bg-3 { + background-color: #c4a000; +} +.asciinema-theme-tango .fg-4 { + color: #3465a4; +} +.asciinema-theme-tango .bg-4 { + background-color: #3465a4; +} +.asciinema-theme-tango .fg-5 { + color: #75507b; +} +.asciinema-theme-tango .bg-5 { + background-color: #75507b; +} +.asciinema-theme-tango .fg-6 { + color: #06989a; +} +.asciinema-theme-tango .bg-6 { + background-color: #06989a; +} +.asciinema-theme-tango .fg-7 { + color: #d3d7cf; +} +.asciinema-theme-tango .bg-7 { + background-color: #d3d7cf; +} +.asciinema-theme-tango .fg-8 { + color: #555753; +} +.asciinema-theme-tango .bg-8 { + background-color: #555753; +} +.asciinema-theme-tango .fg-9 { + color: #ef2929; +} +.asciinema-theme-tango .bg-9 { + background-color: #ef2929; +} +.asciinema-theme-tango .fg-10 { + color: #8ae234; +} +.asciinema-theme-tango .bg-10 { + background-color: #8ae234; +} +.asciinema-theme-tango .fg-11 { + color: #fce94f; +} +.asciinema-theme-tango .bg-11 { + background-color: #fce94f; +} +.asciinema-theme-tango .fg-12 { + color: #729fcf; +} +.asciinema-theme-tango .bg-12 { + background-color: #729fcf; +} +.asciinema-theme-tango .fg-13 { + color: #ad7fa8; +} +.asciinema-theme-tango .bg-13 { + background-color: #ad7fa8; +} +.asciinema-theme-tango .fg-14 { + color: #34e2e2; +} +.asciinema-theme-tango .bg-14 { + background-color: #34e2e2; +} +.asciinema-theme-tango .fg-15 { + color: #eeeeec; +} +.asciinema-theme-tango .bg-15 { + background-color: #eeeeec; +} +.asciinema-theme-tango .fg-8, +.asciinema-theme-tango .fg-9, +.asciinema-theme-tango .fg-10, +.asciinema-theme-tango .fg-11, +.asciinema-theme-tango .fg-12, +.asciinema-theme-tango .fg-13, +.asciinema-theme-tango .fg-14, +.asciinema-theme-tango .fg-15 { + font-weight: bold; +} +.asciinema-theme-solarized-dark .asciinema-terminal { + color: #839496; + background-color: #002b36; + border-color: #002b36; +} +.asciinema-theme-solarized-dark .fg-bg { + color: #002b36; +} +.asciinema-theme-solarized-dark .bg-fg { + background-color: #839496; +} +.asciinema-theme-solarized-dark .fg-0 { + color: #073642; +} +.asciinema-theme-solarized-dark .bg-0 { + background-color: #073642; +} +.asciinema-theme-solarized-dark .fg-1 { + color: #dc322f; +} +.asciinema-theme-solarized-dark .bg-1 { + background-color: #dc322f; +} +.asciinema-theme-solarized-dark .fg-2 { + color: #859900; +} +.asciinema-theme-solarized-dark .bg-2 { + background-color: #859900; +} +.asciinema-theme-solarized-dark .fg-3 { + color: #b58900; +} +.asciinema-theme-solarized-dark .bg-3 { + background-color: #b58900; +} +.asciinema-theme-solarized-dark .fg-4 { + color: #268bd2; +} +.asciinema-theme-solarized-dark .bg-4 { + background-color: #268bd2; +} +.asciinema-theme-solarized-dark .fg-5 { + color: #d33682; +} +.asciinema-theme-solarized-dark .bg-5 { + background-color: #d33682; +} +.asciinema-theme-solarized-dark .fg-6 { + color: #2aa198; +} +.asciinema-theme-solarized-dark .bg-6 { + background-color: #2aa198; +} +.asciinema-theme-solarized-dark .fg-7 { + color: #eee8d5; +} +.asciinema-theme-solarized-dark .bg-7 { + background-color: #eee8d5; +} +.asciinema-theme-solarized-dark .fg-8 { + color: #002b36; +} +.asciinema-theme-solarized-dark .bg-8 { + background-color: #002b36; +} +.asciinema-theme-solarized-dark .fg-9 { + color: #cb4b16; +} +.asciinema-theme-solarized-dark .bg-9 { + background-color: #cb4b16; +} +.asciinema-theme-solarized-dark .fg-10 { + color: #586e75; +} +.asciinema-theme-solarized-dark .bg-10 { + background-color: #586e75; +} +.asciinema-theme-solarized-dark .fg-11 { + color: #657b83; +} +.asciinema-theme-solarized-dark .bg-11 { + background-color: #657b83; +} +.asciinema-theme-solarized-dark .fg-12 { + color: #839496; +} +.asciinema-theme-solarized-dark .bg-12 { + background-color: #839496; +} +.asciinema-theme-solarized-dark .fg-13 { + color: #6c71c4; +} +.asciinema-theme-solarized-dark .bg-13 { + background-color: #6c71c4; +} +.asciinema-theme-solarized-dark .fg-14 { + color: #93a1a1; +} +.asciinema-theme-solarized-dark .bg-14 { + background-color: #93a1a1; +} +.asciinema-theme-solarized-dark .fg-15 { + color: #fdf6e3; +} +.asciinema-theme-solarized-dark .bg-15 { + background-color: #fdf6e3; +} +.asciinema-theme-solarized-light .asciinema-terminal { + color: #657b83; + background-color: #fdf6e3; + border-color: #fdf6e3; +} +.asciinema-theme-solarized-light .fg-bg { + color: #fdf6e3; +} +.asciinema-theme-solarized-light .bg-fg { + background-color: #657b83; +} +.asciinema-theme-solarized-light .fg-0 { + color: #073642; +} +.asciinema-theme-solarized-light .bg-0 { + background-color: #073642; +} +.asciinema-theme-solarized-light .fg-1 { + color: #dc322f; +} +.asciinema-theme-solarized-light .bg-1 { + background-color: #dc322f; +} +.asciinema-theme-solarized-light .fg-2 { + color: #859900; +} +.asciinema-theme-solarized-light .bg-2 { + background-color: #859900; +} +.asciinema-theme-solarized-light .fg-3 { + color: #b58900; +} +.asciinema-theme-solarized-light .bg-3 { + background-color: #b58900; +} +.asciinema-theme-solarized-light .fg-4 { + color: #268bd2; +} +.asciinema-theme-solarized-light .bg-4 { + background-color: #268bd2; +} +.asciinema-theme-solarized-light .fg-5 { + color: #d33682; +} +.asciinema-theme-solarized-light .bg-5 { + background-color: #d33682; +} +.asciinema-theme-solarized-light .fg-6 { + color: #2aa198; +} +.asciinema-theme-solarized-light .bg-6 { + background-color: #2aa198; +} +.asciinema-theme-solarized-light .fg-7 { + color: #eee8d5; +} +.asciinema-theme-solarized-light .bg-7 { + background-color: #eee8d5; +} +.asciinema-theme-solarized-light .fg-8 { + color: #002b36; +} +.asciinema-theme-solarized-light .bg-8 { + background-color: #002b36; +} +.asciinema-theme-solarized-light .fg-9 { + color: #cb4b16; +} +.asciinema-theme-solarized-light .bg-9 { + background-color: #cb4b16; +} +.asciinema-theme-solarized-light .fg-10 { + color: #586e75; +} +.asciinema-theme-solarized-light .bg-10 { + background-color: #586e75; +} +.asciinema-theme-solarized-light .fg-11 { + color: #657c83; +} +.asciinema-theme-solarized-light .bg-11 { + background-color: #657c83; +} +.asciinema-theme-solarized-light .fg-12 { + color: #839496; +} +.asciinema-theme-solarized-light .bg-12 { + background-color: #839496; +} +.asciinema-theme-solarized-light .fg-13 { + color: #6c71c4; +} +.asciinema-theme-solarized-light .bg-13 { + background-color: #6c71c4; +} +.asciinema-theme-solarized-light .fg-14 { + color: #93a1a1; +} +.asciinema-theme-solarized-light .bg-14 { + background-color: #93a1a1; +} +.asciinema-theme-solarized-light .fg-15 { + color: #fdf6e3; +} +.asciinema-theme-solarized-light .bg-15 { + background-color: #fdf6e3; +} +.asciinema-theme-seti .asciinema-terminal { + color: #cacecd; + background-color: #111213; + border-color: #111213; +} +.asciinema-theme-seti .fg-bg { + color: #111213; +} +.asciinema-theme-seti .bg-fg { + background-color: #cacecd; +} +.asciinema-theme-seti .fg-0 { + color: #323232; +} +.asciinema-theme-seti .bg-0 { + background-color: #323232; +} +.asciinema-theme-seti .fg-1 { + color: #c22832; +} +.asciinema-theme-seti .bg-1 { + background-color: #c22832; +} +.asciinema-theme-seti .fg-2 { + color: #8ec43d; +} +.asciinema-theme-seti .bg-2 { + background-color: #8ec43d; +} +.asciinema-theme-seti .fg-3 { + color: #e0c64f; +} +.asciinema-theme-seti .bg-3 { + background-color: #e0c64f; +} +.asciinema-theme-seti .fg-4 { + color: #43a5d5; +} +.asciinema-theme-seti .bg-4 { + background-color: #43a5d5; +} +.asciinema-theme-seti .fg-5 { + color: #8b57b5; +} +.asciinema-theme-seti .bg-5 { + background-color: #8b57b5; +} +.asciinema-theme-seti .fg-6 { + color: #8ec43d; +} +.asciinema-theme-seti .bg-6 { + background-color: #8ec43d; +} +.asciinema-theme-seti .fg-7 { + color: #eeeeee; +} +.asciinema-theme-seti .bg-7 { + background-color: #eeeeee; +} +.asciinema-theme-seti .fg-8 { + color: #323232; +} +.asciinema-theme-seti .bg-8 { + background-color: #323232; +} +.asciinema-theme-seti .fg-9 { + color: #c22832; +} +.asciinema-theme-seti .bg-9 { + background-color: #c22832; +} +.asciinema-theme-seti .fg-10 { + color: #8ec43d; +} +.asciinema-theme-seti .bg-10 { + background-color: #8ec43d; +} +.asciinema-theme-seti .fg-11 { + color: #e0c64f; +} +.asciinema-theme-seti .bg-11 { + background-color: #e0c64f; +} +.asciinema-theme-seti .fg-12 { + color: #43a5d5; +} +.asciinema-theme-seti .bg-12 { + background-color: #43a5d5; +} +.asciinema-theme-seti .fg-13 { + color: #8b57b5; +} +.asciinema-theme-seti .bg-13 { + background-color: #8b57b5; +} +.asciinema-theme-seti .fg-14 { + color: #8ec43d; +} +.asciinema-theme-seti .bg-14 { + background-color: #8ec43d; +} +.asciinema-theme-seti .fg-15 { + color: #ffffff; +} +.asciinema-theme-seti .bg-15 { + background-color: #ffffff; +} +.asciinema-theme-seti .fg-8, +.asciinema-theme-seti .fg-9, +.asciinema-theme-seti .fg-10, +.asciinema-theme-seti .fg-11, +.asciinema-theme-seti .fg-12, +.asciinema-theme-seti .fg-13, +.asciinema-theme-seti .fg-14, +.asciinema-theme-seti .fg-15 { + font-weight: bold; +} +/* Based on Monokai from base16 collection - https://github.com/chriskempson/base16 */ +.asciinema-theme-monokai .asciinema-terminal { + color: #f8f8f2; + background-color: #272822; + border-color: #272822; +} +.asciinema-theme-monokai .fg-bg { + color: #272822; +} +.asciinema-theme-monokai .bg-fg { + background-color: #f8f8f2; +} +.asciinema-theme-monokai .fg-0 { + color: #272822; +} +.asciinema-theme-monokai .bg-0 { + background-color: #272822; +} +.asciinema-theme-monokai .fg-1 { + color: #f92672; +} +.asciinema-theme-monokai .bg-1 { + background-color: #f92672; +} +.asciinema-theme-monokai .fg-2 { + color: #a6e22e; +} +.asciinema-theme-monokai .bg-2 { + background-color: #a6e22e; +} +.asciinema-theme-monokai .fg-3 { + color: #f4bf75; +} +.asciinema-theme-monokai .bg-3 { + background-color: #f4bf75; +} +.asciinema-theme-monokai .fg-4 { + color: #66d9ef; +} +.asciinema-theme-monokai .bg-4 { + background-color: #66d9ef; +} +.asciinema-theme-monokai .fg-5 { + color: #ae81ff; +} +.asciinema-theme-monokai .bg-5 { + background-color: #ae81ff; +} +.asciinema-theme-monokai .fg-6 { + color: #a1efe4; +} +.asciinema-theme-monokai .bg-6 { + background-color: #a1efe4; +} +.asciinema-theme-monokai .fg-7 { + color: #f8f8f2; +} +.asciinema-theme-monokai .bg-7 { + background-color: #f8f8f2; +} +.asciinema-theme-monokai .fg-8 { + color: #75715e; +} +.asciinema-theme-monokai .bg-8 { + background-color: #75715e; +} +.asciinema-theme-monokai .fg-9 { + color: #f92672; +} +.asciinema-theme-monokai .bg-9 { + background-color: #f92672; +} +.asciinema-theme-monokai .fg-10 { + color: #a6e22e; +} +.asciinema-theme-monokai .bg-10 { + background-color: #a6e22e; +} +.asciinema-theme-monokai .fg-11 { + color: #f4bf75; +} +.asciinema-theme-monokai .bg-11 { + background-color: #f4bf75; +} +.asciinema-theme-monokai .fg-12 { + color: #66d9ef; +} +.asciinema-theme-monokai .bg-12 { + background-color: #66d9ef; +} +.asciinema-theme-monokai .fg-13 { + color: #ae81ff; +} +.asciinema-theme-monokai .bg-13 { + background-color: #ae81ff; +} +.asciinema-theme-monokai .fg-14 { + color: #a1efe4; +} +.asciinema-theme-monokai .bg-14 { + background-color: #a1efe4; +} +.asciinema-theme-monokai .fg-15 { + color: #f9f8f5; +} +.asciinema-theme-monokai .bg-15 { + background-color: #f9f8f5; +} +.asciinema-theme-monokai .fg-8, +.asciinema-theme-monokai .fg-9, +.asciinema-theme-monokai .fg-10, +.asciinema-theme-monokai .fg-11, +.asciinema-theme-monokai .fg-12, +.asciinema-theme-monokai .fg-13, +.asciinema-theme-monokai .fg-14, +.asciinema-theme-monokai .fg-15 { + font-weight: bold; +} diff --git a/Documentation/sphinx-static/asciinema-player.js b/Documentation/sphinx-static/asciinema-player.js new file mode 100644 index 00000000000000..5ad47e08b98f87 --- /dev/null +++ b/Documentation/sphinx-static/asciinema-player.js @@ -0,0 +1,1213 @@ +/** + * asciinema-player v2.6.1 + * + * Copyright 2011-2018, Marcin Kulik + * + */ + +// CustomEvent polyfill from MDN (https://developer.mozilla.org/en-US/docs/Web/API/CustomEvent/CustomEvent) + +(function () { + if (typeof window.CustomEvent === "function") return false; + + function CustomEvent ( event, params ) { + params = params || { bubbles: false, cancelable: false, detail: undefined }; + var evt = document.createEvent( 'CustomEvent'); + evt.initCustomEvent(event, params.bubbles, params.cancelable, params.detail); + return evt; + } + + CustomEvent.prototype = window.Event.prototype; + + window.CustomEvent = CustomEvent; +})(); + +/** + * @license + * Copyright (c) 2014 The Polymer Project Authors. All rights reserved. + * This code may only be used under the BSD style license found at http://polymer.github.io/LICENSE.txt + * The complete set of authors may be found at http://polymer.github.io/AUTHORS.txt + * The complete set of contributors may be found at http://polymer.github.io/CONTRIBUTORS.txt + * Code distributed by Google as part of the polymer project is also + * subject to an additional IP rights grant found at http://polymer.github.io/PATENTS.txt + */ +// @version 0.7.22 +"undefined"==typeof WeakMap&&!function(){var e=Object.defineProperty,t=Date.now()%1e9,n=function(){this.name="__st"+(1e9*Math.random()>>>0)+(t++ +"__")};n.prototype={set:function(t,n){var o=t[this.name];return o&&o[0]===t?o[1]=n:e(t,this.name,{value:[t,n],writable:!0}),this},get:function(e){var t;return(t=e[this.name])&&t[0]===e?t[1]:void 0},"delete":function(e){var t=e[this.name];return t&&t[0]===e?(t[0]=t[1]=void 0,!0):!1},has:function(e){var t=e[this.name];return t?t[0]===e:!1}},window.WeakMap=n}(),function(e){function t(e){E.push(e),b||(b=!0,w(o))}function n(e){return window.ShadowDOMPolyfill&&window.ShadowDOMPolyfill.wrapIfNeeded(e)||e}function o(){b=!1;var e=E;E=[],e.sort(function(e,t){return e.uid_-t.uid_});var t=!1;e.forEach(function(e){var n=e.takeRecords();r(e),n.length&&(e.callback_(n,e),t=!0)}),t&&o()}function r(e){e.nodes_.forEach(function(t){var n=v.get(t);n&&n.forEach(function(t){t.observer===e&&t.removeTransientObservers()})})}function i(e,t){for(var n=e;n;n=n.parentNode){var o=v.get(n);if(o)for(var r=0;r0){var r=n[o-1],i=p(r,e);if(i)return void(n[o-1]=i)}else t(this.observer);n[o]=e},addListeners:function(){this.addListeners_(this.target)},addListeners_:function(e){var t=this.options;t.attributes&&e.addEventListener("DOMAttrModified",this,!0),t.characterData&&e.addEventListener("DOMCharacterDataModified",this,!0),t.childList&&e.addEventListener("DOMNodeInserted",this,!0),(t.childList||t.subtree)&&e.addEventListener("DOMNodeRemoved",this,!0)},removeListeners:function(){this.removeListeners_(this.target)},removeListeners_:function(e){var t=this.options;t.attributes&&e.removeEventListener("DOMAttrModified",this,!0),t.characterData&&e.removeEventListener("DOMCharacterDataModified",this,!0),t.childList&&e.removeEventListener("DOMNodeInserted",this,!0),(t.childList||t.subtree)&&e.removeEventListener("DOMNodeRemoved",this,!0)},addTransientObserver:function(e){if(e!==this.target){this.addListeners_(e),this.transientObservedNodes.push(e);var t=v.get(e);t||v.set(e,t=[]),t.push(this)}},removeTransientObservers:function(){var e=this.transientObservedNodes;this.transientObservedNodes=[],e.forEach(function(e){this.removeListeners_(e);for(var t=v.get(e),n=0;n=0)){n.push(e);for(var o,r=e.querySelectorAll("link[rel="+a+"]"),d=0,s=r.length;s>d&&(o=r[d]);d++)o["import"]&&i(o["import"],t,n);t(e)}}var a=window.HTMLImports?window.HTMLImports.IMPORT_LINK_TYPE:"none";e.forDocumentTree=r,e.forSubtree=t}),window.CustomElements.addModule(function(e){function t(e,t){return n(e,t)||o(e,t)}function n(t,n){return e.upgrade(t,n)?!0:void(n&&a(t))}function o(e,t){b(e,function(e){return n(e,t)?!0:void 0})}function r(e){N.push(e),y||(y=!0,setTimeout(i))}function i(){y=!1;for(var e,t=N,n=0,o=t.length;o>n&&(e=t[n]);n++)e();N=[]}function a(e){_?r(function(){d(e)}):d(e)}function d(e){e.__upgraded__&&!e.__attached&&(e.__attached=!0,e.attachedCallback&&e.attachedCallback())}function s(e){u(e),b(e,function(e){u(e)})}function u(e){_?r(function(){c(e)}):c(e)}function c(e){e.__upgraded__&&e.__attached&&(e.__attached=!1,e.detachedCallback&&e.detachedCallback())}function l(e){for(var t=e,n=window.wrap(document);t;){if(t==n)return!0;t=t.parentNode||t.nodeType===Node.DOCUMENT_FRAGMENT_NODE&&t.host}}function f(e){if(e.shadowRoot&&!e.shadowRoot.__watched){g.dom&&console.log("watching shadow-root for: ",e.localName);for(var t=e.shadowRoot;t;)w(t),t=t.olderShadowRoot}}function p(e,n){if(g.dom){var o=n[0];if(o&&"childList"===o.type&&o.addedNodes&&o.addedNodes){for(var r=o.addedNodes[0];r&&r!==document&&!r.host;)r=r.parentNode;var i=r&&(r.URL||r._URL||r.host&&r.host.localName)||"";i=i.split("/?").shift().split("/").pop()}console.group("mutations (%d) [%s]",n.length,i||"")}var a=l(e);n.forEach(function(e){"childList"===e.type&&(M(e.addedNodes,function(e){e.localName&&t(e,a)}),M(e.removedNodes,function(e){e.localName&&s(e)}))}),g.dom&&console.groupEnd()}function m(e){for(e=window.wrap(e),e||(e=window.wrap(document));e.parentNode;)e=e.parentNode;var t=e.__observer;t&&(p(e,t.takeRecords()),i())}function w(e){if(!e.__observer){var t=new MutationObserver(p.bind(this,e));t.observe(e,{childList:!0,subtree:!0}),e.__observer=t}}function v(e){e=window.wrap(e),g.dom&&console.group("upgradeDocument: ",e.baseURI.split("/").pop());var n=e===window.wrap(document);t(e,n),w(e),g.dom&&console.groupEnd()}function h(e){E(e,v)}var g=e.flags,b=e.forSubtree,E=e.forDocumentTree,_=window.MutationObserver._isPolyfilled&&g["throttle-attached"];e.hasPolyfillMutations=_,e.hasThrottledAttached=_;var y=!1,N=[],M=Array.prototype.forEach.call.bind(Array.prototype.forEach),O=Element.prototype.createShadowRoot;O&&(Element.prototype.createShadowRoot=function(){var e=O.call(this);return window.CustomElements.watchShadow(this),e}),e.watchShadow=f,e.upgradeDocumentTree=h,e.upgradeDocument=v,e.upgradeSubtree=o,e.upgradeAll=t,e.attached=a,e.takeRecords=m}),window.CustomElements.addModule(function(e){function t(t,o){if("template"===t.localName&&window.HTMLTemplateElement&&HTMLTemplateElement.decorate&&HTMLTemplateElement.decorate(t),!t.__upgraded__&&t.nodeType===Node.ELEMENT_NODE){var r=t.getAttribute("is"),i=e.getRegisteredDefinition(t.localName)||e.getRegisteredDefinition(r);if(i&&(r&&i.tag==t.localName||!r&&!i["extends"]))return n(t,i,o)}}function n(t,n,r){return a.upgrade&&console.group("upgrade:",t.localName),n.is&&t.setAttribute("is",n.is),o(t,n),t.__upgraded__=!0,i(t),r&&e.attached(t),e.upgradeSubtree(t,r),a.upgrade&&console.groupEnd(),t}function o(e,t){Object.__proto__?e.__proto__=t.prototype:(r(e,t.prototype,t["native"]),e.__proto__=t.prototype)}function r(e,t,n){for(var o={},r=t;r!==n&&r!==HTMLElement.prototype;){for(var i,a=Object.getOwnPropertyNames(r),d=0;i=a[d];d++)o[i]||(Object.defineProperty(e,i,Object.getOwnPropertyDescriptor(r,i)),o[i]=1);r=Object.getPrototypeOf(r)}}function i(e){e.createdCallback&&e.createdCallback()}var a=e.flags;e.upgrade=t,e.upgradeWithDefinition=n,e.implementPrototype=o}),window.CustomElements.addModule(function(e){function t(t,o){var s=o||{};if(!t)throw new Error("document.registerElement: first argument `name` must not be empty");if(t.indexOf("-")<0)throw new Error("document.registerElement: first argument ('name') must contain a dash ('-'). Argument provided was '"+String(t)+"'.");if(r(t))throw new Error("Failed to execute 'registerElement' on 'Document': Registration failed for type '"+String(t)+"'. The type name is invalid.");if(u(t))throw new Error("DuplicateDefinitionError: a type with name '"+String(t)+"' is already registered");return s.prototype||(s.prototype=Object.create(HTMLElement.prototype)),s.__name=t.toLowerCase(),s["extends"]&&(s["extends"]=s["extends"].toLowerCase()),s.lifecycle=s.lifecycle||{},s.ancestry=i(s["extends"]),a(s),d(s),n(s.prototype),c(s.__name,s),s.ctor=l(s),s.ctor.prototype=s.prototype,s.prototype.constructor=s.ctor,e.ready&&v(document),s.ctor}function n(e){if(!e.setAttribute._polyfilled){var t=e.setAttribute;e.setAttribute=function(e,n){o.call(this,e,n,t)};var n=e.removeAttribute;e.removeAttribute=function(e){o.call(this,e,null,n)},e.setAttribute._polyfilled=!0}}function o(e,t,n){e=e.toLowerCase();var o=this.getAttribute(e);n.apply(this,arguments);var r=this.getAttribute(e);this.attributeChangedCallback&&r!==o&&this.attributeChangedCallback(e,o,r)}function r(e){for(var t=0;t<_.length;t++)if(e===_[t])return!0}function i(e){var t=u(e);return t?i(t["extends"]).concat([t]):[]}function a(e){for(var t,n=e["extends"],o=0;t=e.ancestry[o];o++)n=t.is&&t.tag;e.tag=n||e.__name,n&&(e.is=e.__name)}function d(e){if(!Object.__proto__){var t=HTMLElement.prototype;if(e.is){var n=document.createElement(e.tag);t=Object.getPrototypeOf(n)}for(var o,r=e.prototype,i=!1;r;)r==t&&(i=!0),o=Object.getPrototypeOf(r),o&&(r.__proto__=o),r=o;i||console.warn(e.tag+" prototype not found in prototype chain for "+e.is),e["native"]=t}}function s(e){return g(M(e.tag),e)}function u(e){return e?y[e.toLowerCase()]:void 0}function c(e,t){y[e]=t}function l(e){return function(){return s(e)}}function f(e,t,n){return e===N?p(t,n):O(e,t)}function p(e,t){e&&(e=e.toLowerCase()),t&&(t=t.toLowerCase());var n=u(t||e);if(n){if(e==n.tag&&t==n.is)return new n.ctor;if(!t&&!n.is)return new n.ctor}var o;return t?(o=p(e),o.setAttribute("is",t),o):(o=M(e),e.indexOf("-")>=0&&b(o,HTMLElement),o)}function m(e,t){var n=e[t];e[t]=function(){var e=n.apply(this,arguments);return h(e),e}}var w,v=(e.isIE,e.upgradeDocumentTree),h=e.upgradeAll,g=e.upgradeWithDefinition,b=e.implementPrototype,E=e.useNative,_=["annotation-xml","color-profile","font-face","font-face-src","font-face-uri","font-face-format","font-face-name","missing-glyph"],y={},N="http://www.w3.org/1999/xhtml",M=document.createElement.bind(document),O=document.createElementNS.bind(document);w=Object.__proto__||E?function(e,t){return e instanceof t}:function(e,t){if(e instanceof t)return!0;for(var n=e;n;){if(n===t.prototype)return!0;n=n.__proto__}return!1},m(Node.prototype,"cloneNode"),m(document,"importNode"),document.registerElement=t,document.createElement=p,document.createElementNS=f,e.registry=y,e["instanceof"]=w,e.reservedTagList=_,e.getRegisteredDefinition=u,document.register=document.registerElement}),function(e){function t(){i(window.wrap(document)),window.CustomElements.ready=!0;var e=window.requestAnimationFrame||function(e){setTimeout(e,16)};e(function(){setTimeout(function(){window.CustomElements.readyTime=Date.now(),window.HTMLImports&&(window.CustomElements.elapsed=window.CustomElements.readyTime-window.HTMLImports.readyTime),document.dispatchEvent(new CustomEvent("WebComponentsReady",{bubbles:!0}))})})}var n=e.useNative,o=e.initializeModules;e.isIE;if(n){var r=function(){};e.watchShadow=r,e.upgrade=r,e.upgradeAll=r,e.upgradeDocumentTree=r,e.upgradeSubtree=r,e.takeRecords=r,e["instanceof"]=function(e,t){return e instanceof t}}else o();var i=e.upgradeDocumentTree,a=e.upgradeDocument;if(window.wrap||(window.ShadowDOMPolyfill?(window.wrap=window.ShadowDOMPolyfill.wrapIfNeeded,window.unwrap=window.ShadowDOMPolyfill.unwrapIfNeeded):window.wrap=window.unwrap=function(e){return e}),window.HTMLImports&&(window.HTMLImports.__importsParsingHook=function(e){e["import"]&&a(wrap(e["import"]))}),"complete"===document.readyState||e.flags.eager)t();else if("interactive"!==document.readyState||window.attachEvent||window.HTMLImports&&!window.HTMLImports.ready){var d=window.HTMLImports&&!window.HTMLImports.ready?"HTMLImportsLoaded":"DOMContentLoaded";window.addEventListener(d,t)}else t()}(window.CustomElements); +if(typeof Math.imul == "undefined" || (Math.imul(0xffffffff,5) == 0)) { + Math.imul = function (a, b) { + var ah = (a >>> 16) & 0xffff; + var al = a & 0xffff; + var bh = (b >>> 16) & 0xffff; + var bl = b & 0xffff; + // the shift by 0 fixes the sign on the high part + // the final |0 converts the unsigned value into a signed value + return ((al * bl) + (((ah * bl + al * bh) << 16) >>> 0)|0); + } +} + +/** + * React v15.5.4 + * + * Copyright 2013-present, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. An additional grant + * of patent rights can be found in the PATENTS file in the same directory. + * + */ +!function(t){if("object"==typeof exports&&"undefined"!=typeof module)module.exports=t();else if("function"==typeof define&&define.amd)define([],t);else{var e;e="undefined"!=typeof window?window:"undefined"!=typeof global?global:"undefined"!=typeof self?self:this,e.React=t()}}(function(){return function t(e,n,r){function o(u,a){if(!n[u]){if(!e[u]){var s="function"==typeof require&&require;if(!a&&s)return s(u,!0);if(i)return i(u,!0);var c=new Error("Cannot find module '"+u+"'");throw c.code="MODULE_NOT_FOUND",c}var l=n[u]={exports:{}};e[u][0].call(l.exports,function(t){var n=e[u][1][t];return o(n||t)},l,l.exports,t,e,n,r)}return n[u].exports}for(var i="function"==typeof require&&require,u=0;u1){for(var y=Array(d),h=0;h1){for(var m=Array(v),b=0;b8&&C<=11),x=32,w=String.fromCharCode(x),T={beforeInput:{phasedRegistrationNames:{bubbled:"onBeforeInput",captured:"onBeforeInputCapture"},dependencies:["topCompositionEnd","topKeyPress","topTextInput","topPaste"]},compositionEnd:{phasedRegistrationNames:{bubbled:"onCompositionEnd",captured:"onCompositionEndCapture"},dependencies:["topBlur","topCompositionEnd","topKeyDown","topKeyPress","topKeyUp","topMouseDown"]},compositionStart:{phasedRegistrationNames:{bubbled:"onCompositionStart",captured:"onCompositionStartCapture"},dependencies:["topBlur","topCompositionStart","topKeyDown","topKeyPress","topKeyUp","topMouseDown"]},compositionUpdate:{phasedRegistrationNames:{bubbled:"onCompositionUpdate",captured:"onCompositionUpdateCapture"},dependencies:["topBlur","topCompositionUpdate","topKeyDown","topKeyPress","topKeyUp","topMouseDown"]}},k=!1,P=null,S={eventTypes:T,extractEvents:function(e,t,n,r){return[u(e,t,n,r),p(e,t,n,r)]}};t.exports=S},{123:123,19:19,20:20,78:78,82:82}],4:[function(e,t,n){"use strict";function r(e,t){return e+t.charAt(0).toUpperCase()+t.substring(1)}var o={animationIterationCount:!0,borderImageOutset:!0,borderImageSlice:!0,borderImageWidth:!0,boxFlex:!0,boxFlexGroup:!0,boxOrdinalGroup:!0,columnCount:!0,flex:!0,flexGrow:!0,flexPositive:!0,flexShrink:!0,flexNegative:!0,flexOrder:!0,gridRow:!0,gridColumn:!0,fontWeight:!0,lineClamp:!0,lineHeight:!0,opacity:!0,order:!0,orphans:!0,tabSize:!0,widows:!0,zIndex:!0,zoom:!0,fillOpacity:!0,floodOpacity:!0,stopOpacity:!0,strokeDasharray:!0,strokeDashoffset:!0,strokeMiterlimit:!0,strokeOpacity:!0,strokeWidth:!0},i=["Webkit","ms","Moz","O"];Object.keys(o).forEach(function(e){i.forEach(function(t){o[r(t,e)]=o[e]})});var a={background:{backgroundAttachment:!0,backgroundColor:!0,backgroundImage:!0,backgroundPositionX:!0,backgroundPositionY:!0,backgroundRepeat:!0},backgroundPosition:{backgroundPositionX:!0,backgroundPositionY:!0},border:{borderWidth:!0,borderStyle:!0,borderColor:!0},borderBottom:{borderBottomWidth:!0,borderBottomStyle:!0,borderBottomColor:!0},borderLeft:{borderLeftWidth:!0,borderLeftStyle:!0,borderLeftColor:!0},borderRight:{borderRightWidth:!0,borderRightStyle:!0,borderRightColor:!0},borderTop:{borderTopWidth:!0,borderTopStyle:!0,borderTopColor:!0},font:{fontStyle:!0,fontVariant:!0,fontWeight:!0,fontSize:!0,lineHeight:!0,fontFamily:!0},outline:{outlineWidth:!0,outlineStyle:!0,outlineColor:!0}},s={isUnitlessNumber:o,shorthandPropertyExpansions:a};t.exports=s},{}],5:[function(e,t,n){"use strict";var r=e(4),o=e(123),i=(e(58),e(125),e(94)),a=e(136),s=e(140),u=(e(142),s(function(e){return a(e)})),l=!1,c="cssFloat";if(o.canUseDOM){var p=document.createElement("div").style;try{p.font=""}catch(e){l=!0}void 0===document.documentElement.style.cssFloat&&(c="styleFloat")}var d={createMarkupForStyles:function(e,t){var n="";for(var r in e)if(e.hasOwnProperty(r)){var o=e[r];null!=o&&(n+=u(r)+":",n+=i(r,o,t)+";")}return n||null},setValueForStyles:function(e,t,n){var o=e.style;for(var a in t)if(t.hasOwnProperty(a)){var s=i(a,t[a],n);if("float"!==a&&"cssFloat"!==a||(a=c),s)o[a]=s;else{var u=l&&r.shorthandPropertyExpansions[a];if(u)for(var p in u)o[p]="";else o[a]=""}}}};t.exports=d},{123:123,125:125,136:136,140:140,142:142,4:4,58:58,94:94}],6:[function(e,t,n){"use strict";function r(e,t){if(!(e instanceof t))throw new TypeError("Cannot call a class as a function")}var o=e(112),i=e(24),a=(e(137),function(){function e(t){r(this,e),this._callbacks=null,this._contexts=null,this._arg=t}return e.prototype.enqueue=function(e,t){this._callbacks=this._callbacks||[],this._callbacks.push(e),this._contexts=this._contexts||[],this._contexts.push(t)},e.prototype.notifyAll=function(){var e=this._callbacks,t=this._contexts,n=this._arg;if(e&&t){e.length!==t.length&&o("24"),this._callbacks=null,this._contexts=null;for(var r=0;r8));var A=!1;b.canUseDOM&&(A=k("input")&&(!document.documentMode||document.documentMode>11));var D={get:function(){return O.get.call(this)},set:function(e){I=""+e,O.set.call(this,e)}},L={eventTypes:S,extractEvents:function(e,t,n,o){var i,a,s=t?E.getNodeFromInstance(t):window;if(r(s)?R?i=u:a=l:P(s)?A?i=f:(i=m,a=h):v(s)&&(i=g),i){var c=i(e,t);if(c){var p=w.getPooled(S.change,c,n,o);return p.type="change",C.accumulateTwoPhaseDispatches(p),p}}a&&a(e,s,t),"topBlur"===e&&y(t,s)}};t.exports=L},{102:102,109:109,110:110,123:123,16:16,19:19,33:33,71:71,80:80}],8:[function(e,t,n){"use strict";function r(e,t){return Array.isArray(t)&&(t=t[1]),t?t.nextSibling:e.firstChild}function o(e,t,n){c.insertTreeBefore(e,t,n)}function i(e,t,n){Array.isArray(t)?s(e,t[0],t[1],n):m(e,t,n)}function a(e,t){if(Array.isArray(t)){var n=t[1];t=t[0],u(e,t,n),e.removeChild(n)}e.removeChild(t)}function s(e,t,n,r){for(var o=t;;){var i=o.nextSibling;if(m(e,o,r),o===n)break;o=i}}function u(e,t,n){for(;;){var r=t.nextSibling;if(r===n)break;e.removeChild(r)}}function l(e,t,n){var r=e.parentNode,o=e.nextSibling;o===t?n&&m(r,document.createTextNode(n),o):n?(h(o,n),u(r,o,t)):u(r,e,t)}var c=e(9),p=e(13),d=(e(33),e(58),e(93)),f=e(114),h=e(115),m=d(function(e,t,n){e.insertBefore(t,n)}),v=p.dangerouslyReplaceNodeWithMarkup,g={dangerouslyReplaceNodeWithMarkup:v,replaceDelimitedText:l,processUpdates:function(e,t){for(var n=0;n-1||a("96",e),!l.plugins[n]){t.extractEvents||a("97",e),l.plugins[n]=t;var r=t.eventTypes;for(var i in r)o(r[i],t,i)||a("98",i,e)}}}function o(e,t,n){l.eventNameDispatchConfigs.hasOwnProperty(n)&&a("99",n),l.eventNameDispatchConfigs[n]=e;var r=e.phasedRegistrationNames;if(r){for(var o in r)if(r.hasOwnProperty(o)){var s=r[o];i(s,t,n)}return!0}return!!e.registrationName&&(i(e.registrationName,t,n),!0)}function i(e,t,n){l.registrationNameModules[e]&&a("100",e),l.registrationNameModules[e]=t,l.registrationNameDependencies[e]=t.eventTypes[n].dependencies}var a=e(112),s=(e(137),null),u={},l={plugins:[],eventNameDispatchConfigs:{},registrationNameModules:{},registrationNameDependencies:{},possibleRegistrationNames:null,injectEventPluginOrder:function(e){s&&a("101"),s=Array.prototype.slice.call(e),r()},injectEventPluginsByName:function(e){var t=!1;for(var n in e)if(e.hasOwnProperty(n)){var o=e[n];u.hasOwnProperty(n)&&u[n]===o||(u[n]&&a("102",n),u[n]=o,t=!0)}t&&r()},getPluginModuleForEvent:function(e){var t=e.dispatchConfig;if(t.registrationName)return l.registrationNameModules[t.registrationName]||null;if(void 0!==t.phasedRegistrationNames){var n=t.phasedRegistrationNames;for(var r in n)if(n.hasOwnProperty(r)){var o=l.registrationNameModules[n[r]];if(o)return o}}return null},_resetEventPlugins:function(){s=null;for(var e in u)u.hasOwnProperty(e)&&delete u[e];l.plugins.length=0;var t=l.eventNameDispatchConfigs;for(var n in t)t.hasOwnProperty(n)&&delete t[n];var r=l.registrationNameModules;for(var o in r)r.hasOwnProperty(o)&&delete r[o]}};t.exports=l},{112:112,137:137}],18:[function(e,t,n){"use strict";function r(e){return"topMouseUp"===e||"topTouchEnd"===e||"topTouchCancel"===e}function o(e){return"topMouseMove"===e||"topTouchMove"===e}function i(e){return"topMouseDown"===e||"topTouchStart"===e}function a(e,t,n,r){var o=e.type||"unknown-event";e.currentTarget=g.getNodeFromInstance(r),t?m.invokeGuardedCallbackWithCatch(o,n,e):m.invokeGuardedCallback(o,n,e),e.currentTarget=null}function s(e,t){var n=e._dispatchListeners,r=e._dispatchInstances;if(Array.isArray(n))for(var o=0;o1?1-t:void 0;return this._fallbackText=o.slice(e,s),this._fallbackText}}),i.addPoolingTo(r),t.exports=r},{106:106,143:143,24:24}],21:[function(e,t,n){"use strict";var r=e(11),o=r.injection.MUST_USE_PROPERTY,i=r.injection.HAS_BOOLEAN_VALUE,a=r.injection.HAS_NUMERIC_VALUE,s=r.injection.HAS_POSITIVE_NUMERIC_VALUE,u=r.injection.HAS_OVERLOADED_BOOLEAN_VALUE,l={isCustomAttribute:RegExp.prototype.test.bind(new RegExp("^(data|aria)-["+r.ATTRIBUTE_NAME_CHAR+"]*$")),Properties:{accept:0,acceptCharset:0,accessKey:0,action:0,allowFullScreen:i,allowTransparency:0,alt:0,as:0,async:i,autoComplete:0,autoPlay:i,capture:i,cellPadding:0,cellSpacing:0,charSet:0,challenge:0,checked:o|i,cite:0,classID:0,className:0,cols:s,colSpan:0,content:0,contentEditable:0,contextMenu:0,controls:i,coords:0,crossOrigin:0,data:0,dateTime:0,default:i,defer:i,dir:0,disabled:i,download:u,draggable:0,encType:0,form:0,formAction:0,formEncType:0,formMethod:0,formNoValidate:i,formTarget:0,frameBorder:0,headers:0,height:0,hidden:i,high:0,href:0,hrefLang:0,htmlFor:0,httpEquiv:0,icon:0,id:0,inputMode:0,integrity:0,is:0,keyParams:0,keyType:0,kind:0,label:0,lang:0,list:0,loop:i,low:0,manifest:0,marginHeight:0,marginWidth:0,max:0,maxLength:0,media:0,mediaGroup:0,method:0,min:0,minLength:0,multiple:o|i,muted:o|i,name:0,nonce:0,noValidate:i,open:i,optimum:0,pattern:0,placeholder:0,playsInline:i,poster:0,preload:0,profile:0,radioGroup:0,readOnly:i,referrerPolicy:0,rel:0,required:i,reversed:i,role:0,rows:s,rowSpan:a,sandbox:0,scope:0,scoped:i,scrolling:0,seamless:i,selected:o|i,shape:0,size:s,sizes:0,span:s,spellCheck:0,src:0,srcDoc:0,srcLang:0,srcSet:0,start:a,step:0,style:0,summary:0,tabIndex:0,target:0,title:0,type:0,useMap:0,value:0,width:0,wmode:0,wrap:0,about:0,datatype:0,inlist:0,prefix:0,property:0,resource:0,typeof:0,vocab:0,autoCapitalize:0,autoCorrect:0,autoSave:0,color:0,itemProp:0,itemScope:i,itemType:0,itemID:0,itemRef:0,results:0,security:0,unselectable:0},DOMAttributeNames:{acceptCharset:"accept-charset",className:"class",htmlFor:"for",httpEquiv:"http-equiv"},DOMPropertyNames:{},DOMMutationMethods:{value:function(e,t){if(null==t)return e.removeAttribute("value");"number"!==e.type||!1===e.hasAttribute("value")?e.setAttribute("value",""+t):e.validity&&!e.validity.badInput&&e.ownerDocument.activeElement!==e&&e.setAttribute("value",""+t)}}};t.exports=l},{11:11}],22:[function(e,t,n){"use strict";function r(e){var t={"=":"=0",":":"=2"};return"$"+(""+e).replace(/[=:]/g,function(e){return t[e]})}function o(e){var t={"=0":"=","=2":":"};return(""+("."===e[0]&&"$"===e[1]?e.substring(2):e.substring(1))).replace(/(=0|=2)/g,function(e){return t[e]})}var i={escape:r,unescape:o};t.exports=i},{}],23:[function(e,t,n){"use strict";function r(e){null!=e.checkedLink&&null!=e.valueLink&&s("87")}function o(e){r(e),(null!=e.value||null!=e.onChange)&&s("88")}function i(e){r(e),(null!=e.checked||null!=e.onChange)&&s("89")}function a(e){if(e){var t=e.getName();if(t)return" Check the render method of `"+t+"`."}return""}var s=e(112),u=e(64),l=e(145),c=e(120),p=l(c.isValidElement),d=(e(137),e(142),{button:!0,checkbox:!0,image:!0,hidden:!0,radio:!0,reset:!0,submit:!0}),f={value:function(e,t,n){return!e[t]||d[e.type]||e.onChange||e.readOnly||e.disabled?null:new Error("You provided a `value` prop to a form field without an `onChange` handler. This will render a read-only field. If the field should be mutable use `defaultValue`. Otherwise, set either `onChange` or `readOnly`.")},checked:function(e,t,n){return!e[t]||e.onChange||e.readOnly||e.disabled?null:new Error("You provided a `checked` prop to a form field without an `onChange` handler. This will render a read-only field. If the field should be mutable use `defaultChecked`. Otherwise, set either `onChange` or `readOnly`.")},onChange:p.func},h={},m={checkPropTypes:function(e,t,n){for(var r in f){if(f.hasOwnProperty(r))var o=f[r](t,r,e,"prop",null,u);o instanceof Error&&!(o.message in h)&&(h[o.message]=!0,a(n))}},getValue:function(e){return e.valueLink?(o(e),e.valueLink.value):e.value},getChecked:function(e){return e.checkedLink?(i(e),e.checkedLink.value):e.checked},executeOnChange:function(e,t){return e.valueLink?(o(e),e.valueLink.requestChange(t.target.value)):e.checkedLink?(i(e),e.checkedLink.requestChange(t.target.checked)):e.onChange?e.onChange.call(void 0,t):void 0}};t.exports=m},{112:112,120:120,137:137,142:142,145:145,64:64}],24:[function(e,t,n){"use strict";var r=e(112),o=(e(137),function(e){var t=this;if(t.instancePool.length){var n=t.instancePool.pop();return t.call(n,e),n}return new t(e)}),i=function(e,t){var n=this;if(n.instancePool.length){var r=n.instancePool.pop();return n.call(r,e,t),r}return new n(e,t)},a=function(e,t,n){var r=this;if(r.instancePool.length){var o=r.instancePool.pop();return r.call(o,e,t,n),o}return new r(e,t,n)},s=function(e,t,n,r){var o=this;if(o.instancePool.length){var i=o.instancePool.pop();return o.call(i,e,t,n,r),i}return new o(e,t,n,r)},u=function(e){var t=this;e instanceof t||r("25"),e.destructor(),t.instancePool.length=0||null!=t.is}function h(e){var t=e.type;d(t),this._currentElement=e,this._tag=t.toLowerCase(),this._namespaceURI=null,this._renderedChildren=null,this._previousStyle=null,this._previousStyleCopy=null,this._hostNode=null,this._hostParent=null,this._rootNodeID=0,this._domID=0,this._hostContainerInfo=null,this._wrapperState=null,this._topLevelWrapper=null,this._flags=0}var m=e(112),v=e(143),g=e(2),y=e(5),_=e(9),C=e(10),b=e(11),E=e(12),x=e(16),w=e(17),T=e(25),k=e(32),P=e(33),S=e(38),N=e(39),M=e(40),I=e(43),O=(e(58),e(61)),R=e(68),A=(e(129),e(95)),D=(e(137),e(109),e(141),e(118),e(142),k),L=x.deleteListener,U=P.getNodeFromInstance,F=T.listenTo,j=w.registrationNameModules,V={string:!0,number:!0},B="__html",W={children:null,dangerouslySetInnerHTML:null,suppressContentEditableWarning:null},H=11,q={topAbort:"abort",topCanPlay:"canplay",topCanPlayThrough:"canplaythrough",topDurationChange:"durationchange",topEmptied:"emptied",topEncrypted:"encrypted",topEnded:"ended",topError:"error",topLoadedData:"loadeddata",topLoadedMetadata:"loadedmetadata",topLoadStart:"loadstart",topPause:"pause",topPlay:"play",topPlaying:"playing",topProgress:"progress",topRateChange:"ratechange",topSeeked:"seeked",topSeeking:"seeking",topStalled:"stalled",topSuspend:"suspend",topTimeUpdate:"timeupdate",topVolumeChange:"volumechange",topWaiting:"waiting"},K={area:!0,base:!0,br:!0,col:!0,embed:!0,hr:!0,img:!0,input:!0,keygen:!0,link:!0,meta:!0,param:!0,source:!0,track:!0,wbr:!0},z={listing:!0,pre:!0,textarea:!0},Y=v({menuitem:!0},K),X=/^[a-zA-Z][a-zA-Z:_\.\-\d]*$/,Q={},G={}.hasOwnProperty,$=1;h.displayName="ReactDOMComponent",h.Mixin={mountComponent:function(e,t,n,r){this._rootNodeID=$++,this._domID=n._idCounter++,this._hostParent=t,this._hostContainerInfo=n;var i=this._currentElement.props;switch(this._tag){case"audio":case"form":case"iframe":case"img":case"link":case"object":case"source":case"video":this._wrapperState={listeners:null},e.getReactMountReady().enqueue(c,this);break;case"input":S.mountWrapper(this,i,t),i=S.getHostProps(this,i),e.getReactMountReady().enqueue(c,this);break;case"option":N.mountWrapper(this,i,t),i=N.getHostProps(this,i);break;case"select":M.mountWrapper(this,i,t),i=M.getHostProps(this,i),e.getReactMountReady().enqueue(c,this);break;case"textarea":I.mountWrapper(this,i,t),i=I.getHostProps(this,i),e.getReactMountReady().enqueue(c,this)}o(this,i);var a,p;null!=t?(a=t._namespaceURI,p=t._tag):n._tag&&(a=n._namespaceURI,p=n._tag),(null==a||a===C.svg&&"foreignobject"===p)&&(a=C.html),a===C.html&&("svg"===this._tag?a=C.svg:"math"===this._tag&&(a=C.mathml)),this._namespaceURI=a;var d;if(e.useCreateElement){var f,h=n._ownerDocument;if(a===C.html)if("script"===this._tag){var m=h.createElement("div"),v=this._currentElement.type;m.innerHTML="<"+v+">",f=m.removeChild(m.firstChild)}else f=i.is?h.createElement(this._currentElement.type,i.is):h.createElement(this._currentElement.type);else f=h.createElementNS(a,this._currentElement.type);P.precacheNode(this,f),this._flags|=D.hasCachedChildNodes,this._hostParent||E.setAttributeForRoot(f),this._updateDOMProperties(null,i,e);var y=_(f);this._createInitialChildren(e,i,r,y),d=y}else{var b=this._createOpenTagMarkupAndPutListeners(e,i),x=this._createContentMarkup(e,i,r);d=!x&&K[this._tag]?b+"/>":b+">"+x+""}switch(this._tag){case"input":e.getReactMountReady().enqueue(s,this),i.autoFocus&&e.getReactMountReady().enqueue(g.focusDOMComponent,this);break;case"textarea":e.getReactMountReady().enqueue(u,this),i.autoFocus&&e.getReactMountReady().enqueue(g.focusDOMComponent,this);break;case"select":case"button":i.autoFocus&&e.getReactMountReady().enqueue(g.focusDOMComponent,this);break;case"option":e.getReactMountReady().enqueue(l,this)}return d},_createOpenTagMarkupAndPutListeners:function(e,t){var n="<"+this._currentElement.type;for(var r in t)if(t.hasOwnProperty(r)){var o=t[r];if(null!=o)if(j.hasOwnProperty(r))o&&i(this,r,o,e);else{"style"===r&&(o&&(o=this._previousStyleCopy=v({},t.style)),o=y.createMarkupForStyles(o,this));var a=null;null!=this._tag&&f(this._tag,t)?W.hasOwnProperty(r)||(a=E.createMarkupForCustomAttribute(r,o)):a=E.createMarkupForProperty(r,o),a&&(n+=" "+a)}}return e.renderToStaticMarkup?n:(this._hostParent||(n+=" "+E.createMarkupForRoot()),n+=" "+E.createMarkupForID(this._domID))},_createContentMarkup:function(e,t,n){var r="",o=t.dangerouslySetInnerHTML;if(null!=o)null!=o.__html&&(r=o.__html);else{var i=V[typeof t.children]?t.children:null,a=null!=i?null:t.children;if(null!=i)r=A(i);else if(null!=a){var s=this.mountChildren(a,e,n);r=s.join("")}}return z[this._tag]&&"\n"===r.charAt(0)?"\n"+r:r},_createInitialChildren:function(e,t,n,r){var o=t.dangerouslySetInnerHTML;if(null!=o)null!=o.__html&&_.queueHTML(r,o.__html);else{var i=V[typeof t.children]?t.children:null,a=null!=i?null:t.children;if(null!=i)""!==i&&_.queueText(r,i);else if(null!=a)for(var s=this.mountChildren(a,e,n),u=0;u"},receiveComponent:function(){},getHostNode:function(){return i.getNodeFromInstance(this)},unmountComponent:function(){i.uncacheNode(this)}}),t.exports=a},{143:143,33:33,9:9}],36:[function(e,t,n){"use strict";var r={useCreateElement:!0,useFiber:!1};t.exports=r},{}],37:[function(e,t,n){"use strict";var r=e(8),o=e(33),i={dangerouslyProcessChildrenUpdates:function(e,t){var n=o.getNodeFromInstance(e);r.processUpdates(n,t)}};t.exports=i},{33:33,8:8}],38:[function(e,t,n){"use strict";function r(){this._rootNodeID&&d.updateWrapper(this)}function o(e){return"checkbox"===e.type||"radio"===e.type?null!=e.checked:null!=e.value}function i(e){var t=this._currentElement.props,n=l.executeOnChange(t,e);p.asap(r,this);var o=t.name;if("radio"===t.type&&null!=o){for(var i=c.getNodeFromInstance(this),s=i;s.parentNode;)s=s.parentNode;for(var u=s.querySelectorAll("input[name="+JSON.stringify(""+o)+'][type="radio"]'),d=0;dt.end?(n=t.end,r=t.start):(n=t.start,r=t.end),o.moveToElementText(e),o.moveStart("character",n),o.setEndPoint("EndToStart",o),o.moveEnd("character",r-n),o.select()}function s(e,t){if(window.getSelection){var n=window.getSelection(),r=e[c()].length,o=Math.min(t.start,r),i=void 0===t.end?o:Math.min(t.end,r);if(!n.extend&&o>i){var a=i;i=o,o=a}var s=l(e,o),u=l(e,i);if(s&&u){var p=document.createRange();p.setStart(s.node,s.offset),n.removeAllRanges(),o>i?(n.addRange(p),n.extend(u.node,u.offset)):(p.setEnd(u.node,u.offset),n.addRange(p))}}}var u=e(123),l=e(105),c=e(106),p=u.canUseDOM&&"selection"in document&&!("getSelection"in window),d={getOffsets:p?o:i,setOffsets:p?a:s};t.exports=d},{105:105,106:106,123:123}],42:[function(e,t,n){"use strict";var r=e(112),o=e(143),i=e(8),a=e(9),s=e(33),u=e(95),l=(e(137),e(118),function(e){this._currentElement=e,this._stringText=""+e, +this._hostNode=null,this._hostParent=null,this._domID=0,this._mountIndex=0,this._closingComment=null,this._commentNodes=null});o(l.prototype,{mountComponent:function(e,t,n,r){var o=n._idCounter++,i=" react-text: "+o+" ";if(this._domID=o,this._hostParent=t,e.useCreateElement){var l=n._ownerDocument,c=l.createComment(i),p=l.createComment(" /react-text "),d=a(l.createDocumentFragment());return a.queueChild(d,a(c)),this._stringText&&a.queueChild(d,a(l.createTextNode(this._stringText))),a.queueChild(d,a(p)),s.precacheNode(this,c),this._closingComment=p,d}var f=u(this._stringText);return e.renderToStaticMarkup?f:""+f+""},receiveComponent:function(e,t){if(e!==this._currentElement){this._currentElement=e;var n=""+e;if(n!==this._stringText){this._stringText=n;var r=this.getHostNode();i.replaceDelimitedText(r[0],r[1],n)}}},getHostNode:function(){var e=this._commentNodes;if(e)return e;if(!this._closingComment)for(var t=s.getNodeFromInstance(this),n=t.nextSibling;;){if(null==n&&r("67",this._domID),8===n.nodeType&&" /react-text "===n.nodeValue){this._closingComment=n;break}n=n.nextSibling}return e=[this._hostNode,this._closingComment],this._commentNodes=e,e},unmountComponent:function(){this._closingComment=null,this._commentNodes=null,s.uncacheNode(this)}}),t.exports=l},{112:112,118:118,137:137,143:143,33:33,8:8,9:9,95:95}],43:[function(e,t,n){"use strict";function r(){this._rootNodeID&&c.updateWrapper(this)}function o(e){var t=this._currentElement.props,n=s.executeOnChange(t,e);return l.asap(r,this),n}var i=e(112),a=e(143),s=e(23),u=e(33),l=e(71),c=(e(137),e(142),{getHostProps:function(e,t){return null!=t.dangerouslySetInnerHTML&&i("91"),a({},t,{value:void 0,defaultValue:void 0,children:""+e._wrapperState.initialValue,onChange:e._wrapperState.onChange})},mountWrapper:function(e,t){var n=s.getValue(t),r=n;if(null==n){var a=t.defaultValue,u=t.children;null!=u&&(null!=a&&i("92"),Array.isArray(u)&&(u.length<=1||i("93"),u=u[0]),a=""+u),null==a&&(a=""),r=a}e._wrapperState={initialValue:""+r,listeners:null,onChange:o.bind(e)}},updateWrapper:function(e){var t=e._currentElement.props,n=u.getNodeFromInstance(e),r=s.getValue(t);if(null!=r){var o=""+r;o!==n.value&&(n.value=o),null==t.defaultValue&&(n.defaultValue=o)}null!=t.defaultValue&&(n.defaultValue=t.defaultValue)},postMountWrapper:function(e){var t=u.getNodeFromInstance(e),n=t.textContent;n===e._wrapperState.initialValue&&(t.value=n)}});t.exports=c},{112:112,137:137,142:142,143:143,23:23,33:33,71:71}],44:[function(e,t,n){"use strict";function r(e,t){"_hostNode"in e||u("33"),"_hostNode"in t||u("33");for(var n=0,r=e;r;r=r._hostParent)n++;for(var o=0,i=t;i;i=i._hostParent)o++;for(;n-o>0;)e=e._hostParent,n--;for(;o-n>0;)t=t._hostParent,o--;for(var a=n;a--;){if(e===t)return e;e=e._hostParent,t=t._hostParent}return null}function o(e,t){"_hostNode"in e||u("35"),"_hostNode"in t||u("35");for(;t;){if(t===e)return!0;t=t._hostParent}return!1}function i(e){return"_hostNode"in e||u("36"),e._hostParent}function a(e,t,n){for(var r=[];e;)r.push(e),e=e._hostParent;var o;for(o=r.length;o-- >0;)t(r[o],"captured",n);for(o=0;o0;)n(u[l],"captured",i)}var u=e(112);e(137);t.exports={isAncestor:o,getLowestCommonAncestor:r,getParentInstance:i,traverseTwoPhase:a,traverseEnterLeave:s}},{112:112,137:137}],45:[function(e,t,n){"use strict";var r=e(120),o=e(30),i=o;r.addons&&(r.__SECRET_INJECTED_REACT_DOM_DO_NOT_USE_OR_YOU_WILL_BE_FIRED=i),t.exports=i},{120:120,30:30}],46:[function(e,t,n){"use strict";function r(){this.reinitializeTransaction()}var o=e(143),i=e(71),a=e(89),s=e(129),u={initialize:s,close:function(){d.isBatchingUpdates=!1}},l={initialize:s,close:i.flushBatchedUpdates.bind(i)},c=[l,u];o(r.prototype,a,{getTransactionWrappers:function(){return c}});var p=new r,d={isBatchingUpdates:!1,batchedUpdates:function(e,t,n,r,o,i){var a=d.isBatchingUpdates;return d.isBatchingUpdates=!0,a?e(t,n,r,o,i):p.perform(e,null,t,n,r,o,i)}};t.exports=d},{129:129,143:143,71:71,89:89}],47:[function(e,t,n){"use strict";function r(){x||(x=!0,y.EventEmitter.injectReactEventListener(g),y.EventPluginHub.injectEventPluginOrder(s),y.EventPluginUtils.injectComponentTree(d),y.EventPluginUtils.injectTreeTraversal(h),y.EventPluginHub.injectEventPluginsByName({SimpleEventPlugin:E,EnterLeaveEventPlugin:u,ChangeEventPlugin:a,SelectEventPlugin:b,BeforeInputEventPlugin:i}),y.HostComponent.injectGenericComponentClass(p),y.HostComponent.injectTextComponentClass(m),y.DOMProperty.injectDOMPropertyConfig(o),y.DOMProperty.injectDOMPropertyConfig(l),y.DOMProperty.injectDOMPropertyConfig(C),y.EmptyComponent.injectEmptyComponentFactory(function(e){return new f(e)}),y.Updates.injectReconcileTransaction(_),y.Updates.injectBatchingStrategy(v),y.Component.injectEnvironment(c))}var o=e(1),i=e(3),a=e(7),s=e(14),u=e(15),l=e(21),c=e(27),p=e(31),d=e(33),f=e(35),h=e(44),m=e(42),v=e(46),g=e(52),y=e(55),_=e(65),C=e(73),b=e(74),E=e(75),x=!1;t.exports={inject:r}},{1:1,14:14,15:15,21:21,27:27,3:3,31:31,33:33,35:35,42:42,44:44,46:46,52:52,55:55,65:65,7:7,73:73,74:74,75:75}],48:[function(e,t,n){"use strict";var r="function"==typeof Symbol&&Symbol.for&&Symbol.for("react.element")||60103;t.exports=r},{}],49:[function(e,t,n){"use strict";var r,o={injectEmptyComponentFactory:function(e){r=e}},i={create:function(e){return r(e)}};i.injection=o,t.exports=i},{}],50:[function(e,t,n){"use strict";function r(e,t,n){try{t(n)}catch(e){null===o&&(o=e)}}var o=null,i={invokeGuardedCallback:r,invokeGuardedCallbackWithCatch:r,rethrowCaughtError:function(){if(o){var e=o;throw o=null,e}}};t.exports=i},{}],51:[function(e,t,n){"use strict";function r(e){o.enqueueEvents(e),o.processEventQueue(!1)}var o=e(16),i={handleTopLevel:function(e,t,n,i){r(o.extractEvents(e,t,n,i))}};t.exports=i},{16:16}],52:[function(e,t,n){"use strict";function r(e){for(;e._hostParent;)e=e._hostParent;var t=p.getNodeFromInstance(e),n=t.parentNode;return p.getClosestInstanceFromNode(n)}function o(e,t){this.topLevelType=e,this.nativeEvent=t,this.ancestors=[]}function i(e){var t=f(e.nativeEvent),n=p.getClosestInstanceFromNode(t),o=n;do{e.ancestors.push(o),o=o&&r(o)}while(o);for(var i=0;i/," "+i.CHECKSUM_ATTR_NAME+'="'+t+'"$&')},canReuseMarkup:function(e,t){var n=t.getAttribute(i.CHECKSUM_ATTR_NAME);return n=n&&parseInt(n,10),r(e)===n}};t.exports=i},{92:92}],60:[function(e,t,n){"use strict";function r(e,t){for(var n=Math.min(e.length,t.length),r=0;r.":"function"==typeof t?" Instead of passing a class like Foo, pass React.createElement(Foo) or .":null!=t&&void 0!==t.props?" This may be caused by unintentionally loading two independent copies of React.":"");var a,s=v.createElement(F,{child:t});if(e){var u=E.get(e);a=u._processChildContext(u._context)}else a=P;var c=d(n);if(c){var p=c._currentElement,h=p.props.child;if(M(h,t)){var m=c._renderedComponent.getPublicInstance(),g=r&&function(){r.call(m)};return j._updateRootComponent(c,s,a,n,g),m}j.unmountComponentAtNode(n)}var y=o(n),_=y&&!!i(y),C=l(n),b=_&&!c&&!C,x=j._renderNewRootComponent(s,n,b,a)._renderedComponent.getPublicInstance();return r&&r.call(x),x},render:function(e,t,n){return j._renderSubtreeIntoContainer(null,e,t,n)},unmountComponentAtNode:function(e){c(e)||f("40");var t=d(e);return t?(delete L[t._instance.rootID],k.batchedUpdates(u,t,e,!1),!0):(l(e),1===e.nodeType&&e.hasAttribute(O),!1)},_mountImageIntoNode:function(e,t,n,i,a){if(c(t)||f("41"),i){var s=o(t);if(x.canReuseMarkup(e,s))return void y.precacheNode(n,s);var u=s.getAttribute(x.CHECKSUM_ATTR_NAME);s.removeAttribute(x.CHECKSUM_ATTR_NAME);var l=s.outerHTML;s.setAttribute(x.CHECKSUM_ATTR_NAME,u);var p=e,d=r(p,l),m=" (client) "+p.substring(d-20,d+20)+"\n (server) "+l.substring(d-20,d+20);t.nodeType===A&&f("42",m)}if(t.nodeType===A&&f("43"),a.useCreateElement){for(;t.lastChild;)t.removeChild(t.lastChild);h.insertTreeBefore(t,e,null)}else N(t,e),y.precacheNode(n,t.firstChild)}};t.exports=j},{108:108,11:11,112:112,114:114,116:116,119:119,120:120,130:130,137:137,142:142,25:25,33:33,34:34,36:36,53:53,57:57,58:58,59:59,66:66,70:70,71:71,9:9}],61:[function(e,t,n){"use strict";function r(e,t,n){return{type:"INSERT_MARKUP",content:e,fromIndex:null,fromNode:null,toIndex:n,afterNode:t}}function o(e,t,n){return{type:"MOVE_EXISTING",content:null,fromIndex:e._mountIndex,fromNode:d.getHostNode(e),toIndex:n,afterNode:t}}function i(e,t){return{type:"REMOVE_NODE",content:null,fromIndex:e._mountIndex,fromNode:t,toIndex:null,afterNode:null}}function a(e){return{type:"SET_MARKUP",content:e,fromIndex:null,fromNode:null,toIndex:null,afterNode:null}}function s(e){return{type:"TEXT_CONTENT",content:e,fromIndex:null,fromNode:null,toIndex:null,afterNode:null}}function u(e,t){return t&&(e=e||[],e.push(t)),e}function l(e,t){p.processChildrenUpdates(e,t)}var c=e(112),p=e(28),d=(e(57),e(58),e(119),e(66)),f=e(26),h=(e(129),e(97)),m=(e(137),{Mixin:{_reconcilerInstantiateChildren:function(e,t,n){return f.instantiateChildren(e,t,n)},_reconcilerUpdateChildren:function(e,t,n,r,o,i){var a;return a=h(t,0),f.updateChildren(e,a,n,r,o,this,this._hostContainerInfo,i,0),a},mountChildren:function(e,t,n){var r=this._reconcilerInstantiateChildren(e,t,n);this._renderedChildren=r;var o=[],i=0;for(var a in r)if(r.hasOwnProperty(a)){var s=r[a],u=d.mountComponent(s,t,this,this._hostContainerInfo,n,0);s._mountIndex=i++,o.push(u)}return o},updateTextContent:function(e){var t=this._renderedChildren;f.unmountChildren(t,!1);for(var n in t)t.hasOwnProperty(n)&&c("118");l(this,[s(e)])},updateMarkup:function(e){var t=this._renderedChildren;f.unmountChildren(t,!1);for(var n in t)t.hasOwnProperty(n)&&c("118");l(this,[a(e)])},updateChildren:function(e,t,n){this._updateChildren(e,t,n)},_updateChildren:function(e,t,n){var r=this._renderedChildren,o={},i=[],a=this._reconcilerUpdateChildren(r,e,i,o,t,n);if(a||r){var s,c=null,p=0,f=0,h=0,m=null;for(s in a)if(a.hasOwnProperty(s)){var v=r&&r[s],g=a[s];v===g?(c=u(c,this.moveChild(v,m,p,f)),f=Math.max(v._mountIndex,f),v._mountIndex=p):(v&&(f=Math.max(v._mountIndex,f)),c=u(c,this._mountChildAtIndex(g,i[h],m,p,t,n)),h++),p++,m=d.getHostNode(g)}for(s in o)o.hasOwnProperty(s)&&(c=u(c,this._unmountChild(r[s],o[s])));c&&l(this,c),this._renderedChildren=a}},unmountChildren:function(e){var t=this._renderedChildren;f.unmountChildren(t,e),this._renderedChildren=null},moveChild:function(e,t,n,r){if(e._mountIndex0&&r.length<20?n+" (keys: "+r.join(", ")+")":n}function i(e,t){var n=s.get(e);return n||null}var a=e(112),s=(e(119),e(57)),u=(e(58),e(71)),l=(e(137),e(142),{isMounted:function(e){var t=s.get(e);return!!t&&!!t._renderedComponent},enqueueCallback:function(e,t,n){l.validateCallback(t,n);var o=i(e);if(!o)return null;o._pendingCallbacks?o._pendingCallbacks.push(t):o._pendingCallbacks=[t],r(o)},enqueueCallbackInternal:function(e,t){e._pendingCallbacks?e._pendingCallbacks.push(t):e._pendingCallbacks=[t],r(e)},enqueueForceUpdate:function(e){var t=i(e,"forceUpdate");t&&(t._pendingForceUpdate=!0,r(t))},enqueueReplaceState:function(e,t,n){var o=i(e,"replaceState");o&&(o._pendingStateQueue=[t],o._pendingReplaceState=!0,void 0!==n&&null!==n&&(l.validateCallback(n,"replaceState"),o._pendingCallbacks?o._pendingCallbacks.push(n):o._pendingCallbacks=[n]),r(o))},enqueueSetState:function(e,t){var n=i(e,"setState");n&&((n._pendingStateQueue||(n._pendingStateQueue=[])).push(t),r(n))},enqueueElementInternal:function(e,t,n){e._pendingElement=t,e._context=n,r(e)},validateCallback:function(e,t){e&&"function"!=typeof e&&a("122",t,o(e))}});t.exports=l},{112:112,119:119,137:137,142:142,57:57,58:58,71:71}],71:[function(e,t,n){"use strict";function r(){P.ReactReconcileTransaction&&b||c("123")}function o(){this.reinitializeTransaction(),this.dirtyComponentsLength=null,this.callbackQueue=d.getPooled(),this.reconcileTransaction=P.ReactReconcileTransaction.getPooled(!0)}function i(e,t,n,o,i,a){return r(),b.batchedUpdates(e,t,n,o,i,a)}function a(e,t){return e._mountOrder-t._mountOrder}function s(e){var t=e.dirtyComponentsLength;t!==g.length&&c("124",t,g.length),g.sort(a),y++;for(var n=0;n]/;t.exports=o},{}],96:[function(e,t,n){"use strict";function r(e){if(null==e)return null;if(1===e.nodeType)return e;var t=a.get(e);if(t)return t=s(t),t?i.getNodeFromInstance(t):null;"function"==typeof e.render?o("44"):o("45",Object.keys(e))}var o=e(112),i=(e(119),e(33)),a=e(57),s=e(103);e(137),e(142);t.exports=r},{103:103,112:112,119:119,137:137,142:142,33:33,57:57}],97:[function(e,t,n){(function(n){"use strict";function r(e,t,n,r){if(e&&"object"==typeof e){var o=e;void 0===o[n]&&null!=t&&(o[n]=t)}}function o(e,t){if(null==e)return e;var n={};return i(e,r,n),n}var i=(e(22),e(117));e(142);void 0!==n&&n.env,t.exports=o}).call(this,void 0)},{117:117,142:142,22:22}],98:[function(e,t,n){"use strict";function r(e,t,n){Array.isArray(e)?e.forEach(t,n):e&&t.call(n,e)}t.exports=r},{}],99:[function(e,t,n){"use strict";function r(e){var t,n=e.keyCode;return"charCode"in e?0===(t=e.charCode)&&13===n&&(t=13):t=n,t>=32||13===t?t:0}t.exports=r},{}],100:[function(e,t,n){"use strict";function r(e){if(e.key){var t=i[e.key]||e.key;if("Unidentified"!==t)return t}if("keypress"===e.type){var n=o(e);return 13===n?"Enter":String.fromCharCode(n)}return"keydown"===e.type||"keyup"===e.type?a[e.keyCode]||"Unidentified":""}var o=e(99),i={Esc:"Escape",Spacebar:" ",Left:"ArrowLeft",Up:"ArrowUp",Right:"ArrowRight",Down:"ArrowDown",Del:"Delete",Win:"OS",Menu:"ContextMenu",Apps:"ContextMenu",Scroll:"ScrollLock",MozPrintableKey:"Unidentified"},a={8:"Backspace",9:"Tab",12:"Clear",13:"Enter",16:"Shift",17:"Control",18:"Alt",19:"Pause",20:"CapsLock",27:"Escape",32:" ",33:"PageUp",34:"PageDown",35:"End",36:"Home",37:"ArrowLeft",38:"ArrowUp",39:"ArrowRight",40:"ArrowDown",45:"Insert",46:"Delete",112:"F1",113:"F2",114:"F3",115:"F4",116:"F5",117:"F6",118:"F7",119:"F8",120:"F9",121:"F10",122:"F11",123:"F12",144:"NumLock",145:"ScrollLock",224:"Meta"};t.exports=r},{99:99}],101:[function(e,t,n){"use strict";function r(e){var t=this,n=t.nativeEvent;if(n.getModifierState)return n.getModifierState(e);var r=i[e];return!!r&&!!n[r]}function o(e){return r}var i={Alt:"altKey",Control:"ctrlKey",Meta:"metaKey",Shift:"shiftKey"};t.exports=o},{}],102:[function(e,t,n){"use strict";function r(e){var t=e.target||e.srcElement||window;return t.correspondingUseElement&&(t=t.correspondingUseElement),3===t.nodeType?t.parentNode:t}t.exports=r},{}],103:[function(e,t,n){"use strict";function r(e){for(var t;(t=e._renderedNodeType)===o.COMPOSITE;)e=e._renderedComponent;return t===o.HOST?e._renderedComponent:t===o.EMPTY?null:void 0}var o=e(62);t.exports=r},{62:62}],104:[function(e,t,n){"use strict";function r(e){var t=e&&(o&&e[o]||e[i]);if("function"==typeof t)return t}var o="function"==typeof Symbol&&Symbol.iterator,i="@@iterator";t.exports=r},{}],105:[function(e,t,n){"use strict";function r(e){for(;e&&e.firstChild;)e=e.firstChild;return e}function o(e){for(;e;){if(e.nextSibling)return e.nextSibling;e=e.parentNode}}function i(e,t){for(var n=r(e),i=0,a=0;n;){if(3===n.nodeType){if(a=i+n.textContent.length,i<=t&&a>=t)return{node:n,offset:t-i};i=a}n=r(o(n))}}t.exports=i},{}],106:[function(e,t,n){"use strict";function r(){return!i&&o.canUseDOM&&(i="textContent"in document.documentElement?"textContent":"innerText"),i}var o=e(123),i=null;t.exports=r},{123:123}],107:[function(e,t,n){"use strict";function r(e,t){var n={};return n[e.toLowerCase()]=t.toLowerCase(),n["Webkit"+e]="webkit"+t,n["Moz"+e]="moz"+t,n["ms"+e]="MS"+t,n["O"+e]="o"+t.toLowerCase(),n}function o(e){if(s[e])return s[e];if(!a[e])return e;var t=a[e];for(var n in t)if(t.hasOwnProperty(n)&&n in u)return s[e]=t[n];return""}var i=e(123),a={animationend:r("Animation","AnimationEnd"),animationiteration:r("Animation","AnimationIteration"),animationstart:r("Animation","AnimationStart"),transitionend:r("Transition","TransitionEnd")},s={},u={};i.canUseDOM&&(u=document.createElement("div").style,"AnimationEvent"in window||(delete a.animationend.animation,delete a.animationiteration.animation,delete a.animationstart.animation),"TransitionEvent"in window||delete a.transitionend.transition),t.exports=o},{123:123}],108:[function(e,t,n){"use strict";function r(e){if(e){var t=e.getName();if(t)return" Check the render method of `"+t+"`."}return""}function o(e){return"function"==typeof e&&void 0!==e.prototype&&"function"==typeof e.prototype.mountComponent&&"function"==typeof e.prototype.receiveComponent}function i(e,t){var n;if(null===e||!1===e)n=l.create(i);else if("object"==typeof e){var s=e,u=s.type;if("function"!=typeof u&&"string"!=typeof u){var d="";d+=r(s._owner),a("130",null==u?u:typeof u,d)}"string"==typeof s.type?n=c.createInternalComponent(s):o(s.type)?(n=new s.type(s),n.getHostNode||(n.getHostNode=n.getNativeNode)):n=new p(s)}else"string"==typeof e||"number"==typeof e?n=c.createInstanceForText(e):a("131",typeof e);return n._mountIndex=0,n._mountImage=null,n}var a=e(112),s=e(143),u=e(29),l=e(49),c=e(54),p=(e(121),e(137),e(142),function(e){this.construct(e)});s(p.prototype,u,{_instantiateReactComponent:i}),t.exports=i},{112:112,121:121,137:137,142:142,143:143,29:29,49:49,54:54}],109:[function(e,t,n){"use strict";function r(e,t){if(!i.canUseDOM||t&&!("addEventListener"in document))return!1;var n="on"+e,r=n in document;if(!r){var a=document.createElement("div");a.setAttribute(n,"return;"),r="function"==typeof a[n]}return!r&&o&&"wheel"===e&&(r=document.implementation.hasFeature("Events.wheel","3.0")),r}var o,i=e(123);i.canUseDOM&&(o=document.implementation&&document.implementation.hasFeature&&!0!==document.implementation.hasFeature("","")),t.exports=r},{123:123}],110:[function(e,t,n){"use strict";function r(e){var t=e&&e.nodeName&&e.nodeName.toLowerCase();return"input"===t?!!o[e.type]:"textarea"===t}var o={color:!0,date:!0,datetime:!0,"datetime-local":!0,email:!0,month:!0,number:!0,password:!0,range:!0,search:!0,tel:!0,text:!0,time:!0,url:!0,week:!0};t.exports=r},{}],111:[function(e,t,n){"use strict";function r(e){return'"'+o(e)+'"'}var o=e(95);t.exports=r},{95:95}],112:[function(e,t,n){"use strict";function r(e){for(var t=arguments.length-1,n="Minified React error #"+e+"; visit http://facebook.github.io/react/docs/error-decoder.html?invariant="+e,r=0;r]/,u=e(93),l=u(function(e,t){if(e.namespaceURI!==i.svg||"innerHTML"in e)e.innerHTML=t;else{r=r||document.createElement("div"),r.innerHTML=""+t+"";for(var n=r.firstChild;n.firstChild;)e.appendChild(n.firstChild)}});if(o.canUseDOM){var c=document.createElement("div");c.innerHTML=" ",""===c.innerHTML&&(l=function(e,t){if(e.parentNode&&e.parentNode.replaceChild(e,e),a.test(t)||"<"===t[0]&&s.test(t)){e.innerHTML=String.fromCharCode(65279)+t;var n=e.firstChild;1===n.data.length?e.removeChild(n):n.deleteData(0,1)}else e.innerHTML=t}),c=null}t.exports=l},{10:10,123:123,93:93}],115:[function(e,t,n){"use strict";var r=e(123),o=e(95),i=e(114),a=function(e,t){if(t){var n=e.firstChild;if(n&&n===e.lastChild&&3===n.nodeType)return void(n.nodeValue=t)}e.textContent=t};r.canUseDOM&&("textContent"in document.documentElement||(a=function(e,t){if(3===e.nodeType)return void(e.nodeValue=t);i(e,o(t))})),t.exports=a},{114:114,123:123,95:95}],116:[function(e,t,n){"use strict";function r(e,t){var n=null===e||!1===e,r=null===t||!1===t;if(n||r)return n===r;var o=typeof e,i=typeof t;return"string"===o||"number"===o?"string"===i||"number"===i:"object"===i&&e.type===t.type&&e.key===t.key}t.exports=r},{}],117:[function(e,t,n){"use strict";function r(e,t){return e&&"object"==typeof e&&null!=e.key?l.escape(e.key):t.toString(36)}function o(e,t,n,i){var d=typeof e;if("undefined"!==d&&"boolean"!==d||(e=null),null===e||"string"===d||"number"===d||"object"===d&&e.$$typeof===s)return n(i,e,""===t?c+r(e,0):t),1;var f,h,m=0,v=""===t?c:t+p;if(Array.isArray(e))for(var g=0;g":"<"+e+">",s[e]=!a.firstChild),s[e]?d[e]:null}var o=e(123),i=e(137),a=o.canUseDOM?document.createElement("div"):null,s={},u=[1,'"],l=[1,"","
"],c=[3,"","
"],p=[1,'',""],d={"*":[1,"?
","
"],area:[1,"",""],col:[2,"","
"],legend:[1,"
","
"],param:[1,"",""],tr:[2,"","
"],optgroup:u,option:u,caption:l,colgroup:l,tbody:l,tfoot:l,thead:l,td:c,th:c};["circle","clipPath","defs","ellipse","g","image","line","linearGradient","mask","path","pattern","polygon","polyline","radialGradient","rect","stop","text","tspan"].forEach(function(e){d[e]=p,s[e]=!0}),t.exports=r},{123:123,137:137}],134:[function(e,t,n){"use strict";function r(e){return e.Window&&e instanceof e.Window?{x:e.pageXOffset||e.document.documentElement.scrollLeft,y:e.pageYOffset||e.document.documentElement.scrollTop}:{x:e.scrollLeft,y:e.scrollTop}}t.exports=r},{}],135:[function(e,t,n){"use strict";function r(e){return e.replace(o,"-$1").toLowerCase()}var o=/([A-Z])/g;t.exports=r},{}],136:[function(e,t,n){"use strict";function r(e){return o(e).replace(i,"-ms-")}var o=e(135),i=/^ms-/;t.exports=r},{135:135}],137:[function(e,t,n){"use strict";function r(e,t,n,r,i,a,s,u){if(o(t),!e){var l;if(void 0===t)l=new Error("Minified exception occurred; use the non-minified dev environment for the full error message and additional helpful warnings.");else{var c=[n,r,i,a,s,u],p=0;l=new Error(t.replace(/%s/g,function(){return c[p++]})),l.name="Invariant Violation"}throw l.framesToPop=1,l}}var o=function(e){};t.exports=r},{}],138:[function(e,t,n){"use strict";function r(e){var t=e?e.ownerDocument||e:document,n=t.defaultView||window;return!(!e||!("function"==typeof n.Node?e instanceof n.Node:"object"==typeof e&&"number"==typeof e.nodeType&&"string"==typeof e.nodeName))}t.exports=r},{}],139:[function(e,t,n){"use strict";function r(e){return o(e)&&3==e.nodeType}var o=e(138);t.exports=r},{138:138}],140:[function(e,t,n){"use strict";function r(e){var t={};return function(n){return t.hasOwnProperty(n)||(t[n]=e.call(this,n)),t[n]}}t.exports=r},{}],141:[function(e,t,n){"use strict";function r(e,t){return e===t?0!==e||0!==t||1/e==1/t:e!==e&&t!==t}function o(e,t){if(r(e,t))return!0;if("object"!=typeof e||null===e||"object"!=typeof t||null===t)return!1;var n=Object.keys(e),o=Object.keys(t);if(n.length!==o.length)return!1;for(var a=0;a 0x10FFFF || // not a valid Unicode code point + floor(codePoint) != codePoint // not an integer + ) { + throw RangeError('Invalid code point: ' + codePoint); + } + if (codePoint <= 0xFFFF) { // BMP code point + codeUnits.push(codePoint); + } else { // Astral code point; split in surrogate halves + // http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae + codePoint -= 0x10000; + highSurrogate = (codePoint >> 10) + 0xD800; + lowSurrogate = (codePoint % 0x400) + 0xDC00; + codeUnits.push(highSurrogate, lowSurrogate); + } + if (index + 1 == length || codeUnits.length > MAX_SIZE) { + result += stringFromCharCode.apply(null, codeUnits); + codeUnits.length = 0; + } + } + return result; + }; + if (defineProperty) { + defineProperty(String, 'fromCodePoint', { + 'value': fromCodePoint, + 'configurable': true, + 'writable': true + }); + } else { + String.fromCodePoint = fromCodePoint; + } + }()); +} + +/*! http://mths.be/codepointat v0.1.0 by @mathias */ +if (!String.prototype.codePointAt) { + (function() { + 'use strict'; // needed to support `apply`/`call` with `undefined`/`null` + var codePointAt = function(position) { + if (this == null) { + throw TypeError(); + } + var string = String(this); + var size = string.length; + // `ToInteger` + var index = position ? Number(position) : 0; + if (index != index) { // better `isNaN` + index = 0; + } + // Account for out-of-bounds indices: + if (index < 0 || index >= size) { + return undefined; + } + // Get the first code unit + var first = string.charCodeAt(index); + var second; + if ( // check if it’s the start of a surrogate pair + first >= 0xD800 && first <= 0xDBFF && // high surrogate + size > index + 1 // there is a next code unit + ) { + second = string.charCodeAt(index + 1); + if (second >= 0xDC00 && second <= 0xDFFF) { // low surrogate + // http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae + return (first - 0xD800) * 0x400 + second - 0xDC00 + 0x10000; + } + } + return first; + }; + if (Object.defineProperty) { + Object.defineProperty(String.prototype, 'codePointAt', { + 'value': codePointAt, + 'configurable': true, + 'writable': true + }); + } else { + String.prototype.codePointAt = codePointAt; + } + }()); +} + +function registerAsciinemaPlayerElement() { + var AsciinemaPlayerProto = Object.create(HTMLElement.prototype); + + function merge() { + var merged = {}; + for (var i=0; i>>0),ma=0;function na(a,b,c){return a.call.apply(a.bind,arguments)} +function oa(a,b,c){if(!a)throw Error();if(2b?1:0};var ua=Array.prototype.indexOf?function(a,b,c){return Array.prototype.indexOf.call(a,b,c)}:function(a,b,c){c=null==c?0:0>c?Math.max(0,a.length+c):c;if(ca(a))return ca(b)&&1==b.length?a.indexOf(b,c):-1;for(;cb?null:ca(a)?a.charAt(b):a[b]}function ya(a,b){var c=ua(a,b),d;(d=0<=c)&&Array.prototype.splice.call(a,c,1);return d}function za(a,b){a.sort(b||Aa)}function Ca(a,b){for(var c=Array(a.length),d=0;db?1:a2*this.Fc&&Na(this),!0):!1};function Na(a){if(a.Fc!=a.ib.length){for(var b=0,c=0;ba){var b=Ra[a];if(b)return b}b=new Qa([a|0],0>a?-1:0);-128<=a&&128>a&&(Ra[a]=b);return b}function Ta(a){if(isNaN(a)||!isFinite(a))return Ua;if(0>a)return Ta(-a).kb();for(var b=[],c=1,d=0;a>=c;d++)b[d]=a/c|0,c*=Va;return new Qa(b,0)}var Va=4294967296,Ua=Sa(0),Wa=Sa(1),Xa=Sa(16777216);g=Qa.prototype; +g.Of=function(){return 0a||36>>0).toString(a);c=e;if(c.hc())return f+d;for(;6>f.length;)f="0"+f;d=""+f+d}};function Ya(a,b){return 0>b?0:bthis.compare(Xa)};g.Ve=function(a){return 0>=this.compare(a)};g.compare=function(a){a=this.ze(a);return a.Eb()?-1:a.hc()?0:1};g.kb=function(){return this.Hf().add(Wa)}; +g.add=function(a){for(var b=Math.max(this.Ma.length,a.Ma.length),c=[],d=0,e=0;e<=b;e++){var f=d+(Ya(this,e)&65535)+(Ya(a,e)&65535),h=(f>>>16)+(Ya(this,e)>>>16)+(Ya(a,e)>>>16);d=h>>>16;f&=65535;h&=65535;c[e]=h<<16|f}return new Qa(c,c[c.length-1]&-2147483648?-1:0)};g.ze=function(a){return this.add(a.kb())}; +g.multiply=function(a){if(this.hc()||a.hc())return Ua;if(this.Eb())return a.Eb()?this.kb().multiply(a.kb()):this.kb().multiply(a).kb();if(a.Eb())return this.multiply(a.kb()).kb();if(this.Ue()&&a.Ue())return Ta(this.vd()*a.vd());for(var b=this.Ma.length+a.Ma.length,c=[],d=0;d<2*b;d++)c[d]=0;for(d=0;d>>16,h=Ya(this,d)&65535,k=Ya(a,e)>>>16,l=Ya(a,e)&65535;c[2*d+2*e]+=h*l;ab(c,2*d+2*e);c[2*d+2*e+1]+=f*l;ab(c,2*d+2*e+1);c[2*d+2*e+1]+= +h*k;ab(c,2*d+2*e+1);c[2*d+2*e+2]+=f*k;ab(c,2*d+2*e+2)}for(d=0;d>>16,a[b]&=65535,b++} +function Za(a,b){if(b.hc())throw Error("division by zero");if(a.hc())return Ua;if(a.Eb())return b.Eb()?Za(a.kb(),b.kb()):Za(a.kb(),b).kb();if(b.Eb())return Za(a,b.kb()).kb();if(30=f?1:Math.pow(2,f-48);h=Ta(e);for(var k=h.multiply(b);k.Eb()||k.xf(d);)e-=f,h=Ta(e),k=h.multiply(b);h.hc()&&(h=Wa);c=c.add(h);d=d.ze(k)}return c}g.Hf=function(){for(var a=this.Ma.length,b=[],c=0;c>5;a%=32;for(var c=this.Ma.length+b+(0>>32-a:Ya(this,e-b);return new Qa(d,this.Lc)}; +g.ad=function(a){var b=a>>5;a%=32;for(var c=this.Ma.length-b,d=[],e=0;e>>a|Ya(this,e+b+1)<<32-a:Ya(this,e+b);return new Qa(d,this.Lc)};function cb(a,b){null!=a&&this.append.apply(this,arguments)}g=cb.prototype;g.xc="";g.set=function(a){this.xc=""+a};g.append=function(a,b,c){this.xc+=String(a);if(null!=b)for(var d=1;d>>16&65535)*d+c*(b>>>16&65535)<<16>>>0)|0};function hd(a){a=gd(a|0,-862048943);return gd(a<<15|a>>>-15,461845907)} +function id(a,b){var c=(a|0)^(b|0);return gd(c<<13|c>>>-13,5)+-430675100|0}function jd(a,b){var c=(a|0)^b;c=gd(c^c>>>16,-2048144789);c=gd(c^c>>>13,-1028477387);return c^c>>>16}function kd(a){a:{var b=1;for(var c=0;;)if(b>2)}function qd(a){return a instanceof rd} +function sd(a,b){if(a.Zb===b.Zb)return 0;var c=wb(a.fb);if(t(c?b.fb:c))return-1;if(t(a.fb)){if(wb(b.fb))return 1;c=Aa(a.fb,b.fb);return 0===c?Aa(a.name,b.name):c}return Aa(a.name,b.name)}function rd(a,b,c,d,e){this.fb=a;this.name=b;this.Zb=c;this.Oc=d;this.hb=e;this.m=2154168321;this.J=4096}g=rd.prototype;g.toString=function(){return this.Zb};g.equiv=function(a){return this.K(null,a)};g.K=function(a,b){return b instanceof rd?this.Zb===b.Zb:!1}; +g.call=function(){var a=null;a=function(a,c,d){switch(arguments.length){case 2:return D.c(c,this);case 3:return D.l(c,this,d)}throw Error("Invalid arity: "+(arguments.length-1));};a.c=function(a,c){return D.c(c,this)};a.l=function(a,c,d){return D.l(c,this,d)};return a}();g.apply=function(a,b){return this.call.apply(this,[this].concat(Gb(b)))};g.h=function(a){return D.c(a,this)};g.c=function(a,b){return D.l(a,this,b)};g.P=function(){return this.hb}; +g.T=function(a,b){return new rd(this.fb,this.name,this.Zb,this.Oc,b)};g.U=function(){var a=this.Oc;return null!=a?a:this.Oc=a=pd(kd(this.name),nd(this.fb))};g.hd=function(){return this.name};g.jd=function(){return this.fb};g.R=function(a,b){return Jc(b,this.Zb)};var td=function td(a){switch(arguments.length){case 1:return td.h(arguments[0]);case 2:return td.c(arguments[0],arguments[1]);default:throw Error(["Invalid arity: ",v.h(arguments.length)].join(""));}}; +td.h=function(a){if(a instanceof rd)return a;var b=a.indexOf("/");return 1>b?td.c(null,a):td.c(a.substring(0,b),a.substring(b+1,a.length))};td.c=function(a,b){var c=null!=a?[v.h(a),"/",v.h(b)].join(""):b;return new rd(a,b,c,null,null)};td.L=2;function ud(a){return null!=a?a.J&131072||q===a.Tf?!0:a.J?!1:Ab(cd,a):Ab(cd,a)} +function E(a){if(null==a)return null;if(null!=a&&(a.m&8388608||q===a.Pe))return a.S(null);if(vb(a)||"string"===typeof a)return 0===a.length?null:new Jb(a,0,null);if(Ab(Bc,a))return Cc(a);throw Error([v.h(a)," is not ISeqable"].join(""));}function y(a){if(null==a)return null;if(null!=a&&(a.m&64||q===a.G))return a.Ia(null);a=E(a);return null==a?null:Wb(a)}function vd(a){return null!=a?null!=a&&(a.m&64||q===a.G)?a.bb(null):(a=E(a))?Yb(a):wd:wd} +function z(a){return null==a?null:null!=a&&(a.m&128||q===a.Id)?a.Ka(null):E(vd(a))}var G=function G(a){switch(arguments.length){case 1:return G.h(arguments[0]);case 2:return G.c(arguments[0],arguments[1]);default:for(var c=[],d=arguments.length,e=0;;)if(e=d)return-1;!(0c&&(c+=d,c=0>c?0:c);for(;;)if(cc?d+c:c;for(;;)if(0<=c){if(G.c(Vd(a,c),b))return c;--c}else return-1}function Yd(a,b){this.o=a;this.i=b} +Yd.prototype.ja=function(){return this.ia?0:a};g.Rc=function(){var a=this.W(null);return 0d)c=1;else if(0===c)c=0;else a:for(d=0;;){var e=Ke(Vd(a,d),Vd(b,d));if(0===e&&d+1>1&1431655765;a=(a&858993459)+(a>>2&858993459);return 16843009*(a+(a>>4)&252645135)>>24} +var v=function v(a){switch(arguments.length){case 0:return v.B();case 1:return v.h(arguments[0]);default:for(var c=[],d=arguments.length,e=0;;)if(ed:e))c[d]=a.next(),d+=1;else return qf(new nf(c,0,d),Rf.h?Rf.h(a):Rf.call(null,a))}else return null},null,null)};function Sf(a,b,c,d,e,f){this.buffer=a;this.ub=b;this.pe=c;this.Rb=d;this.ye=e;this.Gf=f} +Sf.prototype.step=function(){if(this.ub!==Nf)return!0;for(;;)if(this.ub===Nf)if(this.buffer.Td()){if(this.pe)return!1;if(this.ye.ja()){if(this.Gf)var a=P(this.Rb,ae(null,this.ye.next()));else a=this.ye.next(),a=this.Rb.c?this.Rb.c(null,a):this.Rb.call(null,null,a);Hd(a)&&(this.Rb.h?this.Rb.h(null):this.Rb.call(null,null),this.pe=!0)}else this.Rb.h?this.Rb.h(null):this.Rb.call(null,null),this.pe=!0}else this.ub=this.buffer.remove();else return!0};Sf.prototype.ja=function(){return this.step()}; +Sf.prototype.next=function(){if(this.ja()){var a=this.ub;this.ub=Nf;return a}throw Error("No such element");};Sf.prototype.remove=function(){return Error("Unsupported operation")};Sf.prototype[Fb]=function(){return yd(this)}; +function Tf(a,b){var c=new Sf(Qf,Nf,!1,null,b,!1);c.Rb=function(){var b=function(a){return function(){function b(b,c){a.buffer=a.buffer.add(c);return b}var c=null;c=function(a,c){switch(arguments.length){case 0:return null;case 1:return a;case 2:return b.call(this,a,c)}throw Error("Invalid arity: "+(arguments.length-1));};c.B=function(){return null};c.h=function(a){return a};c.c=b;return c}()}(c);return a.h?a.h(b):a.call(null,b)}();return c} +function Uf(a,b){var c=Kf(b);c=Tf(a,c);c=Rf(c);return t(c)?c:wd}function Vf(a,b){for(;;){if(null==E(b))return!0;var c=y(b);c=a.h?a.h(c):a.call(null,c);if(t(c)){c=a;var d=z(b);a=c;b=d}else return!1}}function Wf(a,b){for(;;)if(E(b)){var c=y(b);c=a.h?a.h(c):a.call(null,c);if(t(c))return c;c=a;var d=z(b);a=c;b=d}else return null}function Xf(a){if(Ge(a))return 0===(a&1);throw Error(["Argument must be an integer: ",v.h(a)].join(""));} +function Yf(a){return function(){function b(b,c){return wb(a.c?a.c(b,c):a.call(null,b,c))}function c(b){return wb(a.h?a.h(b):a.call(null,b))}function d(){return wb(a.B?a.B():a.call(null))}var e=null,f=function(){function b(a,b,d){var e=null;if(2a?0:a-1>>>5<<5}function Jg(a,b,c){for(;;){if(0===b)return c;var d=Gg(a);d.o[0]=c;c=d;b-=5}} +var Kg=function Kg(a,b,c,d){var f=Hg(c),h=a.F-1>>>b&31;5===b?f.o[h]=d:(c=c.o[h],null!=c?(b-=5,a=Kg.M?Kg.M(a,b,c,d):Kg.call(null,a,b,c,d)):a=Jg(null,b-5,d),f.o[h]=a);return f};function Lg(a,b){throw Error(["No item ",v.h(a)," in vector of length ",v.h(b)].join(""));}function Mg(a,b){if(b>=Ig(a))return a.fa;for(var c=a.root,d=a.shift;;)if(0>>d&31];d=e}else return c.o} +var Ng=function Ng(a,b,c,d,e){var h=Hg(c);if(0===b)h.o[d&31]=e;else{var k=d>>>b&31;b-=5;c=c.o[k];a=Ng.Z?Ng.Z(a,b,c,d,e):Ng.call(null,a,b,c,d,e);h.o[k]=a}return h},Og=function Og(a,b,c){var e=a.F-2>>>b&31;if(5=this.F)a=new Jb(this.fa,0,null);else{a:{a=this.root;for(var b=this.shift;;)if(0this.F-Ig(this)){for(var c=this.fa.length,d=Array(c+1),e=0;;)if(e>>5>1<b)return new R(null,b,5,T,a,null);for(var c=32,d=(new R(null,32,5,T,a.slice(0,32),null)).Pc(null);;)if(cb||this.end<=this.start+b?Lg(b,this.end-this.start):A.c(this.Ja,this.start+b)};g.ka=function(a,b,c){return 0>b||this.end<=this.start+b?c:A.l(this.Ja,this.start+b,c)}; +g.dc=function(a,b,c){a=this.start+b;if(0>b||this.end+1<=a)throw Error(["Index ",v.h(b)," out of bounds [0,",v.h(this.W(null)),"]"].join(""));b=this.meta;c=K.l(this.Ja,a,c);var d=this.end;a+=1;return Zg(b,c,this.start,d>a?d:a,null)};g.ba=function(){return null!=this.Ja&&q===this.Ja.fe?Qg(this.Ja,this.start,this.end):new Jf(Hf,this)};g.P=function(){return this.meta};g.W=function(){return this.end-this.start};g.Ac=function(){return A.c(this.Ja,this.end-1)}; +g.Bc=function(){if(this.start===this.end)throw Error("Can't pop empty vector");return Zg(this.meta,this.Ja,this.start,this.end-1,null)};g.Rc=function(){return this.start!==this.end?new Zd(this,this.end-this.start-1,null):null};g.U=function(){var a=this.w;return null!=a?a:this.w=a=Ad(this)};g.K=function(a,b){return $d(this,b)};g.oa=function(){return tc(he,this.meta)};g.Fa=function(a,b){return null!=this.Ja&&q===this.Ja.fe?Rg(this.Ja,b,this.start,this.end):Kd(this,b)}; +g.Ga=function(a,b,c){return null!=this.Ja&&q===this.Ja.fe?Sg(this.Ja,b,c,this.start,this.end):Ld(this,b,c)};g.O=function(a,b,c){if("number"===typeof b)return this.dc(null,b,c);throw Error("Subvec's key for assoc must be a number.");};g.S=function(){var a=this;return function(b){return function e(d){return d===a.end?null:ae(A.c(a.Ja,d),new kf(null,function(){return function(){return e(d+1)}}(b),null,null))}}(this)(a.start)};g.T=function(a,b){return Zg(b,this.Ja,this.start,this.end,this.w)}; +g.X=function(a,b){return Zg(this.meta,qc(this.Ja,this.end,b),this.start,this.end+1,null)};g.call=function(){var a=null;a=function(a,c,d){switch(arguments.length){case 2:return this.$(null,c);case 3:return this.ka(null,c,d)}throw Error("Invalid arity: "+(arguments.length-1));};a.c=function(a,c){return this.$(null,c)};a.l=function(a,c,d){return this.ka(null,c,d)};return a}();g.apply=function(a,b){return this.call.apply(this,[this].concat(Gb(b)))};g.h=function(a){return this.$(null,a)}; +g.c=function(a,b){return this.ka(null,a,b)};Yg.prototype[Fb]=function(){return yd(this)};function Zg(a,b,c,d,e){for(;;)if(b instanceof Yg)c=b.start+c,d=b.start+d,b=b.Ja;else{if(!ze(b))throw Error("v must satisfy IVector");var f=H(b);if(0>c||0>d||c>f||d>f)throw Error("Index out of bounds");return new Yg(a,b,c,d,e)}}function $g(a,b){return a===b.la?b:new Fg(a,Gb(b.o))} +var ah=function ah(a,b,c,d){c=$g(a.root.la,c);var f=a.F-1>>>b&31;if(5===b)a=d;else{var h=c.o[f];null!=h?(b-=5,a=ah.M?ah.M(a,b,h,d):ah.call(null,a,b,h,d)):a=Jg(a.root.la,b-5,d)}c.o[f]=a;return c};function Tg(a,b,c,d){this.F=a;this.shift=b;this.root=c;this.fa=d;this.J=88;this.m=275}g=Tg.prototype; +g.Dc=function(a,b){if(this.root.la){if(32>this.F-Ig(this))this.fa[this.F&31]=b;else{var c=new Fg(this.root.la,this.fa),d=[null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null];d[0]=b;this.fa=d;if(this.F>>>5>1<>>d&31,m=k(d-5,f.o[p]);f.o[p]=m}return f}}(a)(a.shift,a.root)}();a.root=d}return a}if(b===a.F)return a.Dc(null,c);throw Error(["Index ",v.h(b)," out of bounds for TransientVector of length",v.h(a.F)].join(""));}throw Error("assoc! after persistent!");} +g.W=function(){if(this.root.la)return this.F;throw Error("count after persistent!");};g.$=function(a,b){if(this.root.la)return(0<=b&&b=c)return new r(this.meta,this.F-1,d,null);G.c(b,this.o[e])||(d[f]=this.o[e],d[f+1]=this.o[e+1],f+=2);e+=2}}else return this}; +g.O=function(a,b,c){a=ih(this.o,b);if(-1===a){if(this.Fb?4:2*(b+1));Be(this.o,0,c,0,2*b);return new xh(a,this.na,c)};g.qd=function(){return yh(this.o,0,null)};g.Jc=function(a,b){return vh(this.o,a,b)};g.sc=function(a,b,c,d){var e=1<<(b>>>a&31);if(0===(this.na&e))return d;var f=$e(this.na&e-1);e=this.o[2*f];f=this.o[2*f+1];return null==e?f.sc(a+5,b,c,d):rh(c,e)?f:d}; +g.Kb=function(a,b,c,d,e,f){var h=1<<(c>>>b&31),k=$e(this.na&h-1);if(0===(this.na&h)){var l=$e(this.na);if(2*l>>b&31]=zh.Kb(a,b+5,c,d,e,f);for(e=d=0;;)if(32>d)0!== +(this.na>>>d&1)&&(k[d]=null!=this.o[e]?zh.Kb(a,b+5,od(this.o[e]),this.o[e],this.o[e+1],f):this.o[e+1],e+=2),d+=1;else break;return new Ah(a,l+1,k)}b=Array(2*(l+4));Be(this.o,0,b,0,2*k);b[2*k]=d;b[2*k+1]=e;Be(this.o,2*k,b,2*(k+1),2*(l-k));f.H=!0;a=this.Gc(a);a.o=b;a.na|=h;return a}l=this.o[2*k];h=this.o[2*k+1];if(null==l)return l=h.Kb(a,b+5,c,d,e,f),l===h?this:uh(this,a,2*k+1,l);if(rh(d,l))return e===h?this:uh(this,a,2*k+1,e);f.H=!0;f=b+5;b=od(l);if(b===c)e=new Bh(null,b,2,[l,h,d,e]);else{var p=new qh; +e=zh.Kb(a,f,b,l,h,p).Kb(a,f,c,d,e,p)}d=2*k;k=2*k+1;a=this.Gc(a);a.o[d]=null;a.o[k]=e;return a}; +g.Jb=function(a,b,c,d,e){var f=1<<(b>>>a&31),h=$e(this.na&f-1);if(0===(this.na&f)){var k=$e(this.na);if(16<=k){h=[null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null];h[b>>>a&31]=zh.Jb(a+5,b,c,d,e);for(d=c=0;;)if(32>c)0!==(this.na>>>c&1)&&(h[c]=null!=this.o[d]?zh.Jb(a+5,od(this.o[d]),this.o[d],this.o[d+1],e):this.o[d+1],d+=2),c+=1;else break;return new Ah(null,k+1,h)}a=Array(2*(k+1));Be(this.o, +0,a,0,2*h);a[2*h]=c;a[2*h+1]=d;Be(this.o,2*h,a,2*(h+1),2*(k-h));e.H=!0;return new xh(null,this.na|f,a)}var l=this.o[2*h];f=this.o[2*h+1];if(null==l)return k=f.Jb(a+5,b,c,d,e),k===f?this:new xh(null,this.na,sh(this.o,2*h+1,k));if(rh(c,l))return d===f?this:new xh(null,this.na,sh(this.o,2*h+1,d));e.H=!0;e=this.na;k=this.o;a+=5;var p=od(l);if(p===b)c=new Bh(null,p,2,[l,f,c,d]);else{var m=new qh;c=zh.Jb(a,p,l,f,m).Jb(a,b,c,d,m)}a=2*h;h=2*h+1;d=Gb(k);d[a]=null;d[h]=c;return new xh(null,e,d)}; +g.rd=function(a,b,c){var d=1<<(b>>>a&31);if(0===(this.na&d))return this;var e=$e(this.na&d-1),f=this.o[2*e],h=this.o[2*e+1];return null==f?(a=h.rd(a+5,b,c),a===h?this:null!=a?new xh(null,this.na,sh(this.o,2*e+1,a)):this.na===d?null:new xh(null,this.na^d,th(this.o,e))):rh(c,f)?new xh(null,this.na^d,th(this.o,e)):this};g.ba=function(){return new wh(this.o,0,null,null)};var zh=new xh(null,0,[]);function Ch(a,b,c){this.o=a;this.i=b;this.Lb=c} +Ch.prototype.ja=function(){for(var a=this.o.length;;){if(null!=this.Lb&&this.Lb.ja())return!0;if(this.i>>a&31];return null!=e?e.sc(a+5,b,c,d):d};g.Kb=function(a,b,c,d,e,f){var h=c>>>b&31,k=this.o[h];if(null==k)return a=uh(this,a,h,zh.Kb(a,b+5,c,d,e,f)),a.F+=1,a;b=k.Kb(a,b+5,c,d,e,f);return b===k?this:uh(this,a,h,b)}; +g.Jb=function(a,b,c,d,e){var f=b>>>a&31,h=this.o[f];if(null==h)return new Ah(null,this.F+1,sh(this.o,f,zh.Jb(a+5,b,c,d,e)));a=h.Jb(a+5,b,c,d,e);return a===h?this:new Ah(null,this.F,sh(this.o,f,a))}; +g.rd=function(a,b,c){var d=b>>>a&31,e=this.o[d];if(null!=e){a=e.rd(a+5,b,c);if(a===e)d=this;else if(null==a)if(8>=this.F)a:{e=this.o;a=e.length;b=Array(2*(this.F-1));c=0;for(var f=1,h=0;;)if(ca?d:rh(c,this.o[a])?this.o[a+1]:d}; +g.Kb=function(a,b,c,d,e,f){if(c===this.ec){b=Eh(this.o,this.F,d);if(-1===b){if(this.o.length>2*this.F)return b=2*this.F,c=2*this.F+1,a=this.Gc(a),a.o[b]=d,a.o[c]=e,f.H=!0,a.F+=1,a;c=this.o.length;b=Array(c+2);Be(this.o,0,b,0,c);b[c]=d;b[c+1]=e;f.H=!0;d=this.F+1;a===this.la?(this.o=b,this.F=d,a=this):a=new Bh(this.la,this.ec,d,b);return a}return this.o[b+1]===e?this:uh(this,a,b+1,e)}return(new xh(a,1<<(this.ec>>>b&31),[null,this,null,null])).Kb(a,b,c,d,e,f)}; +g.Jb=function(a,b,c,d,e){return b===this.ec?(a=Eh(this.o,this.F,c),-1===a?(a=2*this.F,b=Array(a+2),Be(this.o,0,b,0,a),b[a]=c,b[a+1]=d,e.H=!0,new Bh(null,this.ec,this.F+1,b)):G.c(this.o[a+1],d)?this:new Bh(null,this.ec,this.F,sh(this.o,a+1,d))):(new xh(null,1<<(this.ec>>>a&31),[null,this])).Jb(a,b,c,d,e)};g.rd=function(a,b,c){a=Eh(this.o,this.F,c);return-1===a?this:1===this.F?null:new Bh(null,this.ec,this.F-1,th(this.o,Ze(a)))};g.ba=function(){return new wh(this.o,0,null,null)}; +function Fh(a,b,c,d,e){this.meta=a;this.Mb=b;this.i=c;this.s=d;this.w=e;this.m=32374988;this.J=0}g=Fh.prototype;g.toString=function(){return fd(this)};g.equiv=function(a){return this.K(null,a)};g.indexOf=function(){var a=null;a=function(a,c){switch(arguments.length){case 1:return Ud(this,a,0);case 2:return Ud(this,a,c)}throw Error("Invalid arity: "+(arguments.length-1));};a.h=function(a){return Ud(this,a,0)};a.c=function(a,c){return Ud(this,a,c)};return a}(); +g.lastIndexOf=function(){function a(a){return Xd(this,a,H(this))}var b=null;b=function(b,d){switch(arguments.length){case 1:return a.call(this,b);case 2:return Xd(this,b,d)}throw Error("Invalid arity: "+(arguments.length-1));};b.h=a;b.c=function(a,b){return Xd(this,a,b)};return b}();g.P=function(){return this.meta};g.Ka=function(){return null==this.s?yh(this.Mb,this.i+2,null):yh(this.Mb,this.i,z(this.s))};g.U=function(){var a=this.w;return null!=a?a:this.w=a=Ad(this)}; +g.K=function(a,b){return $d(this,b)};g.oa=function(){return tc(wd,this.meta)};g.Fa=function(a,b){return ce(b,this)};g.Ga=function(a,b,c){return de(b,c,this)};g.Ia=function(){return null==this.s?new R(null,2,5,T,[this.Mb[this.i],this.Mb[this.i+1]],null):y(this.s)};g.bb=function(){var a=null==this.s?yh(this.Mb,this.i+2,null):yh(this.Mb,this.i,z(this.s));return null!=a?a:wd};g.S=function(){return this};g.T=function(a,b){return new Fh(b,this.Mb,this.i,this.s,this.w)};g.X=function(a,b){return ae(b,this)}; +Fh.prototype[Fb]=function(){return yd(this)};function yh(a,b,c){if(null==c)for(c=a.length;;)if(bthis.F?H(z(this))+1:this.F};g.U=function(){var a=this.w;return null!=a?a:this.w=a=Ad(this)};g.K=function(a,b){return $d(this,b)};g.oa=function(){return tc(wd,this.meta)};g.Fa=function(a,b){return ce(b,this)};g.Ga=function(a,b,c){return de(b,c,this)};g.Ia=function(){var a=this.stack;return null==a?null:nc(a)};g.bb=function(){var a=y(this.stack);a=Mh(this.vc?a.right:a.left,z(this.stack),this.vc);return null!=a?new Nh(null,a,this.vc,this.F-1,null):wd};g.S=function(){return this}; +g.T=function(a,b){return new Nh(b,this.stack,this.vc,this.F,this.w)};g.X=function(a,b){return ae(b,this)};Nh.prototype[Fb]=function(){return yd(this)};function Oh(a,b,c){return new Nh(null,Mh(a,null,b),b,c,null)} +function Ph(a,b,c,d){return c instanceof Qh?c.left instanceof Qh?new Qh(c.key,c.H,c.left.bc(),new Rh(a,b,c.right,d,null),null):c.right instanceof Qh?new Qh(c.right.key,c.right.H,new Rh(c.key,c.H,c.left,c.right.left,null),new Rh(a,b,c.right.right,d,null),null):new Rh(a,b,c,d,null):new Rh(a,b,c,d,null)} +function Sh(a,b,c,d){return d instanceof Qh?d.right instanceof Qh?new Qh(d.key,d.H,new Rh(a,b,c,d.left,null),d.right.bc(),null):d.left instanceof Qh?new Qh(d.left.key,d.left.H,new Rh(a,b,c,d.left.left,null),new Rh(d.key,d.H,d.left.right,d.right,null),null):new Rh(a,b,c,d,null):new Rh(a,b,c,d,null)} +function Th(a,b,c,d){if(c instanceof Qh)return new Qh(a,b,c.bc(),d,null);if(d instanceof Rh)return Sh(a,b,c,d.ud());if(d instanceof Qh&&d.left instanceof Rh)return new Qh(d.left.key,d.left.H,new Rh(a,b,c,d.left.left,null),Sh(d.key,d.H,d.left.right,d.right.ud()),null);throw Error("red-black tree invariant violation");} +function Uh(a,b,c,d){if(d instanceof Qh)return new Qh(a,b,c,d.bc(),null);if(c instanceof Rh)return Ph(a,b,c.ud(),d);if(c instanceof Qh&&c.right instanceof Rh)return new Qh(c.right.key,c.right.H,Ph(c.key,c.H,c.left.ud(),c.right.left),new Rh(a,b,c.right.right,d,null),null);throw Error("red-black tree invariant violation");} +var Vh=function Vh(a,b,c){var e=null!=a.left?function(){var e=a.left;return Vh.l?Vh.l(e,b,c):Vh.call(null,e,b,c)}():c;if(Hd(e))return e;var f=function(){var c=a.key,f=a.H;return b.l?b.l(e,c,f):b.call(null,e,c,f)}();if(Hd(f))return f;if(null!=a.right){var h=a.right;return Vh.l?Vh.l(h,b,f):Vh.call(null,h,b,f)}return f};function Rh(a,b,c,d,e){this.key=a;this.H=b;this.left=c;this.right=d;this.w=e;this.m=32402207;this.J=0}g=Rh.prototype; +g.lastIndexOf=function(){function a(a){return Xd(this,a,H(this))}var b=null;b=function(b,d){switch(arguments.length){case 1:return a.call(this,b);case 2:return Xd(this,b,d)}throw Error("Invalid arity: "+(arguments.length-1));};b.h=a;b.c=function(a,b){return Xd(this,a,b)};return b}(); +g.indexOf=function(){var a=null;a=function(a,c){switch(arguments.length){case 1:return Ud(this,a,0);case 2:return Ud(this,a,c)}throw Error("Invalid arity: "+(arguments.length-1));};a.h=function(a){return Ud(this,a,0)};a.c=function(a,c){return Ud(this,a,c)};return a}();g.Ee=function(a){return a.He(this)};g.ud=function(){return new Qh(this.key,this.H,this.left,this.right,null)};g.bc=function(){return this};g.De=function(a){return a.Ge(this)};g.replace=function(a,b,c,d){return new Rh(a,b,c,d,null)}; +g.Ge=function(a){return new Rh(a.key,a.H,this,a.right,null)};g.He=function(a){return new Rh(a.key,a.H,a.left,this,null)};g.Jc=function(a,b){return Vh(this,a,b)};g.V=function(a,b){return this.ka(null,b,null)};g.I=function(a,b,c){return this.ka(null,b,c)};g.$=function(a,b){if(0===b)return this.key;if(1===b)return this.H;throw Error("Index out of bounds");};g.ka=function(a,b,c){return 0===b?this.key:1===b?this.H:c};g.dc=function(a,b,c){return(new R(null,2,5,T,[this.key,this.H],null)).dc(null,b,c)}; +g.P=function(){return null};g.W=function(){return 2};g.fd=function(){return this.key};g.gd=function(){return this.H};g.Ac=function(){return this.H};g.Bc=function(){return new R(null,1,5,T,[this.key],null)};g.U=function(){var a=this.w;return null!=a?a:this.w=a=Ad(this)};g.K=function(a,b){return $d(this,b)};g.oa=function(){return he};g.Fa=function(a,b){return Kd(this,b)};g.Ga=function(a,b,c){return Ld(this,b,c)};g.O=function(a,b,c){return K.l(new R(null,2,5,T,[this.key,this.H],null),b,c)}; +g.yc=function(a,b){return 0===b||1===b};g.S=function(){var a=this.key;return Tb(Tb(wd,this.H),a)};g.T=function(a,b){return tc(new R(null,2,5,T,[this.key,this.H],null),b)};g.X=function(a,b){return new R(null,3,5,T,[this.key,this.H,b],null)}; +g.call=function(){var a=null;a=function(a,c,d){switch(arguments.length){case 2:return this.$(null,c);case 3:return this.ka(null,c,d)}throw Error("Invalid arity: "+(arguments.length-1));};a.c=function(a,c){return this.$(null,c)};a.l=function(a,c,d){return this.ka(null,c,d)};return a}();g.apply=function(a,b){return this.call.apply(this,[this].concat(Gb(b)))};g.h=function(a){return this.$(null,a)};g.c=function(a,b){return this.ka(null,a,b)};Rh.prototype[Fb]=function(){return yd(this)}; +function Qh(a,b,c,d,e){this.key=a;this.H=b;this.left=c;this.right=d;this.w=e;this.m=32402207;this.J=0}g=Qh.prototype;g.lastIndexOf=function(){function a(a){return Xd(this,a,H(this))}var b=null;b=function(b,d){switch(arguments.length){case 1:return a.call(this,b);case 2:return Xd(this,b,d)}throw Error("Invalid arity: "+(arguments.length-1));};b.h=a;b.c=function(a,b){return Xd(this,a,b)};return b}(); +g.indexOf=function(){var a=null;a=function(a,c){switch(arguments.length){case 1:return Ud(this,a,0);case 2:return Ud(this,a,c)}throw Error("Invalid arity: "+(arguments.length-1));};a.h=function(a){return Ud(this,a,0)};a.c=function(a,c){return Ud(this,a,c)};return a}();g.Ee=function(a){return new Qh(this.key,this.H,this.left,a,null)};g.ud=function(){throw Error("red-black tree invariant violation");};g.bc=function(){return new Rh(this.key,this.H,this.left,this.right,null)}; +g.De=function(a){return new Qh(this.key,this.H,a,this.right,null)};g.replace=function(a,b,c,d){return new Qh(a,b,c,d,null)};g.Ge=function(a){return this.left instanceof Qh?new Qh(this.key,this.H,this.left.bc(),new Rh(a.key,a.H,this.right,a.right,null),null):this.right instanceof Qh?new Qh(this.right.key,this.right.H,new Rh(this.key,this.H,this.left,this.right.left,null),new Rh(a.key,a.H,this.right.right,a.right,null),null):new Rh(a.key,a.H,this,a.right,null)}; +g.He=function(a){return this.right instanceof Qh?new Qh(this.key,this.H,new Rh(a.key,a.H,a.left,this.left,null),this.right.bc(),null):this.left instanceof Qh?new Qh(this.left.key,this.left.H,new Rh(a.key,a.H,a.left,this.left.left,null),new Rh(this.key,this.H,this.left.right,this.right,null),null):new Rh(a.key,a.H,a.left,this,null)};g.Jc=function(a,b){return Vh(this,a,b)};g.V=function(a,b){return this.ka(null,b,null)};g.I=function(a,b,c){return this.ka(null,b,c)}; +g.$=function(a,b){if(0===b)return this.key;if(1===b)return this.H;throw Error("Index out of bounds");};g.ka=function(a,b,c){return 0===b?this.key:1===b?this.H:c};g.dc=function(a,b,c){return(new R(null,2,5,T,[this.key,this.H],null)).dc(null,b,c)};g.P=function(){return null};g.W=function(){return 2};g.fd=function(){return this.key};g.gd=function(){return this.H};g.Ac=function(){return this.H};g.Bc=function(){return new R(null,1,5,T,[this.key],null)}; +g.U=function(){var a=this.w;return null!=a?a:this.w=a=Ad(this)};g.K=function(a,b){return $d(this,b)};g.oa=function(){return he};g.Fa=function(a,b){return Kd(this,b)};g.Ga=function(a,b,c){return Ld(this,b,c)};g.O=function(a,b,c){return K.l(new R(null,2,5,T,[this.key,this.H],null),b,c)};g.yc=function(a,b){return 0===b||1===b};g.S=function(){var a=this.key;return Tb(Tb(wd,this.H),a)};g.T=function(a,b){return tc(new R(null,2,5,T,[this.key,this.H],null),b)}; +g.X=function(a,b){return new R(null,3,5,T,[this.key,this.H,b],null)};g.call=function(){var a=null;a=function(a,c,d){switch(arguments.length){case 2:return this.$(null,c);case 3:return this.ka(null,c,d)}throw Error("Invalid arity: "+(arguments.length-1));};a.c=function(a,c){return this.$(null,c)};a.l=function(a,c,d){return this.ka(null,c,d)};return a}();g.apply=function(a,b){return this.call.apply(this,[this].concat(Gb(b)))};g.h=function(a){return this.$(null,a)}; +g.c=function(a,b){return this.ka(null,a,b)};Qh.prototype[Fb]=function(){return yd(this)}; +var Wh=function Wh(a,b,c,d,e){if(null==b)return new Qh(c,d,null,null,null);var h=function(){var d=b.key;return a.c?a.c(c,d):a.call(null,c,d)}();if(0===h)return e[0]=b,null;if(0>h)return h=function(){var h=b.left;return Wh.Z?Wh.Z(a,h,c,d,e):Wh.call(null,a,h,c,d,e)}(),null!=h?b.De(h):null;h=function(){var h=b.right;return Wh.Z?Wh.Z(a,h,c,d,e):Wh.call(null,a,h,c,d,e)}();return null!=h?b.Ee(h):null},Xh=function Xh(a,b){if(null==a)return b;if(null==b)return a;if(a instanceof Qh){if(b instanceof Qh){var d= +function(){var d=a.right,f=b.left;return Xh.c?Xh.c(d,f):Xh.call(null,d,f)}();return d instanceof Qh?new Qh(d.key,d.H,new Qh(a.key,a.H,a.left,d.left,null),new Qh(b.key,b.H,d.right,b.right,null),null):new Qh(a.key,a.H,a.left,new Qh(b.key,b.H,d,b.right,null),null)}return new Qh(a.key,a.H,a.left,function(){var d=a.right;return Xh.c?Xh.c(d,b):Xh.call(null,d,b)}(),null)}if(b instanceof Qh)return new Qh(b.key,b.H,function(){var d=b.left;return Xh.c?Xh.c(a,d):Xh.call(null,a,d)}(),b.right,null);d=function(){var d= +a.right,f=b.left;return Xh.c?Xh.c(d,f):Xh.call(null,d,f)}();return d instanceof Qh?new Qh(d.key,d.H,new Rh(a.key,a.H,a.left,d.left,null),new Rh(b.key,b.H,d.right,b.right,null),null):Th(a.key,a.H,a.left,new Rh(b.key,b.H,d,b.right,null))},Yh=function Yh(a,b,c,d){if(null!=b){var f=function(){var d=b.key;return a.c?a.c(c,d):a.call(null,c,d)}();if(0===f)return d[0]=b,Xh(b.left,b.right);if(0>f)return f=function(){var f=b.left;return Yh.M?Yh.M(a,f,c,d):Yh.call(null,a,f,c,d)}(),null!=f||null!=d[0]?b.left instanceof +Rh?Th(b.key,b.H,f,b.right):new Qh(b.key,b.H,f,b.right,null):null;f=function(){var f=b.right;return Yh.M?Yh.M(a,f,c,d):Yh.call(null,a,f,c,d)}();return null!=f||null!=d[0]?b.right instanceof Rh?Uh(b.key,b.H,b.left,f):new Qh(b.key,b.H,b.left,f,null):null}return null},Zh=function Zh(a,b,c,d){var f=b.key,h=a.c?a.c(c,f):a.call(null,c,f);return 0===h?b.replace(f,d,b.left,b.right):0>h?b.replace(f,b.H,function(){var f=b.left;return Zh.M?Zh.M(a,f,c,d):Zh.call(null,a,f,c,d)}(),b.right):b.replace(f,b.H,b.left, +function(){var f=b.right;return Zh.M?Zh.M(a,f,c,d):Zh.call(null,a,f,c,d)}())};function $h(a,b,c,d,e){this.Bb=a;this.mc=b;this.F=c;this.meta=d;this.w=e;this.m=418776847;this.J=8192}g=$h.prototype;g.forEach=function(a){for(var b=E(this),c=null,d=0,e=0;;)if(ed?c.left:c.right}else return null}g.has=function(a){return He(this,a)};g.V=function(a,b){return this.I(null,b,null)}; +g.I=function(a,b,c){a=ai(this,b);return null!=a?a.H:c};g.Qc=function(a,b,c){return null!=this.mc?Jd(Vh(this.mc,b,c)):c};g.P=function(){return this.meta};g.W=function(){return this.F};g.Rc=function(){return 0(a.h?a.h(c):a.call(null,c))?b:c};Ai.A=function(a,b,c,d){return Mb(function(b,c){return Ai.l(a,b,c)},Ai.l(a,b,c),d)};Ai.N=function(a){var b=y(a),c=z(a);a=y(c);var d=z(c);c=y(d);d=z(d);return Ai.A(b,a,c,d)};Ai.L=3;function Bi(a,b){return new kf(null,function(){var c=E(b);if(c){var d=y(c);d=a.h?a.h(d):a.call(null,d);c=t(d)?ae(y(c),Bi(a,vd(c))):null}else c=null;return c},null,null)}function Di(a,b,c){this.i=a;this.end=b;this.step=c} +Di.prototype.ja=function(){return 0this.end};Di.prototype.next=function(){var a=this.i;this.i+=this.step;return a};function Ei(a,b,c,d,e){this.meta=a;this.start=b;this.end=c;this.step=d;this.w=e;this.m=32375006;this.J=139264}g=Ei.prototype;g.toString=function(){return fd(this)};g.equiv=function(a){return this.K(null,a)}; +g.indexOf=function(){var a=null;a=function(a,c){switch(arguments.length){case 1:return Ud(this,a,0);case 2:return Ud(this,a,c)}throw Error("Invalid arity: "+(arguments.length-1));};a.h=function(a){return Ud(this,a,0)};a.c=function(a,c){return Ud(this,a,c)};return a}(); +g.lastIndexOf=function(){function a(a){return Xd(this,a,H(this))}var b=null;b=function(b,d){switch(arguments.length){case 1:return a.call(this,b);case 2:return Xd(this,b,d)}throw Error("Invalid arity: "+(arguments.length-1));};b.h=a;b.c=function(a,b){return Xd(this,a,b)};return b}();g.$=function(a,b){if(0<=b&&bthis.end&&0===this.step)return this.start;throw Error("Index out of bounds");}; +g.ka=function(a,b,c){return 0<=b&&bthis.end&&0===this.step?this.start:c};g.ba=function(){return new Di(this.start,this.end,this.step)};g.P=function(){return this.meta};g.Ka=function(){return 0this.end?new Ei(this.meta,this.start+this.step,this.end,this.step,null):null}; +g.W=function(){return wb(this.S(null))?0:Math.ceil((this.end-this.start)/this.step)};g.U=function(){var a=this.w;return null!=a?a:this.w=a=Ad(this)};g.K=function(a,b){return $d(this,b)};g.oa=function(){return tc(wd,this.meta)};g.Fa=function(a,b){return Kd(this,b)};g.Ga=function(a,b,c){for(a=this.start;;)if(0this.end){c=b.c?b.c(c,a):b.call(null,c,a);if(Hd(c))return B(c);a+=this.step}else return c};g.Ia=function(){return null==this.S(null)?null:this.start}; +g.bb=function(){return null!=this.S(null)?new Ei(this.meta,this.start+this.step,this.end,this.step,null):wd};g.S=function(){return 0this.step?this.start>this.end?this:null:this.start===this.end?null:this};g.T=function(a,b){return new Ei(b,this.start,this.end,this.step,this.w)};g.X=function(a,b){return ae(b,this)};Ei.prototype[Fb]=function(){return yd(this)};function Fi(a,b,c){return new Ei(null,a,b,c,null)} +function Gi(a,b){return new R(null,2,5,T,[Bi(a,b),ng(a,b)],null)} +function Hi(a){var b=y;return function(){function c(c,d,e){return new R(null,2,5,T,[b.l?b.l(c,d,e):b.call(null,c,d,e),a.l?a.l(c,d,e):a.call(null,c,d,e)],null)}function d(c,d){return new R(null,2,5,T,[b.c?b.c(c,d):b.call(null,c,d),a.c?a.c(c,d):a.call(null,c,d)],null)}function e(c){return new R(null,2,5,T,[b.h?b.h(c):b.call(null,c),a.h?a.h(c):a.call(null,c)],null)}function f(){return new R(null,2,5,T,[b.B?b.B():b.call(null),a.B?a.B():a.call(null)],null)}var h=null,k=function(){function c(a,b,c,e){var f= +null;if(3lb)return Jc(a,"#");Jc(a,c);if(0===tb.h(f))E(h)&&Jc(a,function(){var a=Ki.h(f);return t(a)?a:"..."}());else{if(E(h)){var l=y(h);b.l?b.l(l,a,f):b.call(null,l,a,f)}for(var p=z(h),m=tb.h(f)-1;;)if(!p||null!=m&&0===m){E(p)&&0===m&&(Jc(a,d),Jc(a,function(){var a=Ki.h(f);return t(a)?a:"..."}()));break}else{Jc(a,d);var u=y(p);c=a;h=f;b.l?b.l(u,c,h):b.call(null,u,c,h);var w=z(p);c=m-1;p=w;m=c}}return Jc(a,e)}finally{lb=k}} +function Li(a,b){for(var c=E(b),d=null,e=0,f=0;;)if(fH(a)?a.toUpperCase():[v.h(a.substring(0,1).toUpperCase()),v.h(a.substring(1))].join("")} +function Qo(a){if("string"===typeof a)return a;a=jf(a);var b=Fo(a,/-/),c=E(b);b=y(c);c=z(c);return t(Oo.h?Oo.h(b):Oo.call(null,b))?a:Kb(v,b,ig.c(Po,c))}function Ro(a){var b=function(){var b=function(){var b=me(a);return b?(b=a.displayName,t(b)?b:a.name):b}();if(t(b))return b;b=function(){var b=null!=a?a.J&4096||q===a.Oe?!0:!1:!1;return b?jf(a):b}();if(t(b))return b;b=qe(a);return xe(b)?Tk.h(b):null}();return Do(""+v.h(b),"$",".")}var So=!1;if("undefined"===typeof To)var To=0;function Uo(a){return setTimeout(a,16)}var Vo="undefined"===typeof window||null==window.document?Uo:function(){var a=window,b=a.requestAnimationFrame;if(t(b))return b;b=a.webkitRequestAnimationFrame;if(t(b))return b;b=a.mozRequestAnimationFrame;if(t(b))return b;a=a.msRequestAnimationFrame;return t(a)?a:Uo}();function Wo(a,b){return a.cljsMountOrder-b.cljsMountOrder}if("undefined"===typeof Xo)var Xo=function(){return null};function Yo(a){this.Yd=a} +function Zo(a,b){var c=a[b];if(null==c)return null;a[b]=null;for(var d=c.length,e=0;;)if(e=d&&a.push(gq(c));return a}}(e),[b,c],a))}};if("undefined"===typeof jq)var jq=null;function kq(){if(null!=jq)return jq;if("undefined"!==typeof ReactDOM)return jq=ReactDOM;if("undefined"!==typeof require){var a=jq=require("react-dom");if(t(a))return a;throw Error("require('react-dom') failed");}throw Error("js/ReactDOM is missing");}if("undefined"===typeof lq)var lq=dg.h(Ef); +function mq(a,b,c){var d=So;So=!0;try{return kq().render(a.B?a.B():a.call(null),b,function(){return function(){var d=So;So=!1;try{return gg.M(lq,K,b,new R(null,2,5,T,[a,b],null)),Zo(bp,"afterRender"),null!=c?c.B?c.B():c.call(null):null}finally{So=d}}}(d))}finally{So=d}}function nq(a,b){return mq(a,b,null)}function oq(a,b,c){qp();return mq(function(){return gq(me(a)?a.B?a.B():a.call(null):a)},b,c)}Wp=function(a){return kq().findDOMNode(a)};function pq(a){switch(arguments.length){case 2:return oq(arguments[0],arguments[1],null);case 3:return oq(arguments[0],arguments[1],arguments[2]);default:throw Error(["Invalid arity: ",v.h(arguments.length)].join(""));}}function qq(a,b){return oq(a,b,null)} +da("reagent.core.force_update_all",function(){qp();qp();for(var a=E(mh(B(lq))),b=null,c=0,d=0;;)if(d=Number(c)?a:a=-1Number(a)?"-":0<=b.indexOf("+")?"+":0<=b.indexOf(" ")?" ":"";0<=Number(a)&&(d=f+d);if(isNaN(c)||d.length>=Number(c))return d;d=isNaN(e)?Math.abs(Number(a)).toString():Math.abs(Number(a)).toFixed(e);a=Number(c)-d.length-f.length;0<=b.indexOf("-",0)?d=f+d+sa(" ",a):(b=0<=b.indexOf("0",0)?"0":" ",d=f+sa(b,a)+d);return d};yq.fc.d=function(a,b,c,d,e,f,h,k){return yq.fc.f(parseInt(a,10),b,c,d,0,f,h,k)}; +yq.fc.i=yq.fc.d;yq.fc.u=yq.fc.d;function zq(a){var b=be([Vk,null]);return wg.c(t(a)?a:Ef,function(){return function e(a){return new kf(null,function(){for(var b=a;;)if(b=E(b)){if(Ae(b)){var d=Wc(b),k=H(d),l=of(k);a:for(var p=0;;)if(p=H(h)&&Vf(function(){return function(a){return!(a instanceof Xq)}}(b,c,d,e,f,h),h)))throw Error(Bq("%s is not a valid sequence schema; %s%s%s",be([a,"a valid sequence schema consists of zero or more `one` elements, ","followed by zero or more `optional` elements, followed by an optional ", +"schema that will match the remaining elements."])));return new R(null,2,5,T,[O.c(c,f),y(h)],null)} +R.prototype.xb=function(){var a=this,b=Zq(a),c=J(b,0,null),d=J(b,1,null);return Wg(O.c(function(){return function(a,b,c,d){return function m(e){return new kf(null,function(){return function(){for(;;){var a=E(e);if(a){if(Ae(a)){var b=Wc(a),c=H(b),d=of(c);return function(){for(var a=0;;)if(ac?f:c;return $r(a,ea?0:a}():function(){var a=e-b;return f>a?f:a}())} +function gs(a,b){var c=null!=a&&(a.m&64||q===a.G)?P(U,a):a,d=D.c(c,pl);d=null!=d&&(d.m&64||q===d.G)?P(U,d):d;var e=D.c(d,Aj),f=D.c(c,Yj),h=D.c(c,no);return $r(c,e>f?function(){var a=h-1,c=e+b;return a=a}}(l,p,a,c,c,d,e,f,h,k),h),l,p);return Zr(c,d)} +function it(a,b){var c=null!=a&&(a.m&64||q===a.G)?P(U,a):a,d=D.c(c,pl),e=null!=d&&(d.m&64||q===d.G)?P(U,d):d,f=D.c(e,zn),h=D.c(c,tk),k=D.c(c,fl),l=b-1;d=J(cf(Bi(function(a,b,c,d,e,f,h){return function(a){return h>a}}(l,a,c,c,d,e,f,h,k),h)),l,0);return Zr(c,d)}function jt(a){return K.l(a,im,Ve)}function kt(a){return K.l(a,im,Hr)}function lt(a,b,c){return K.l(a,b,c)}function mt(a,b,c){return Wg(O.A(jg(b,a),new R(null,1,5,T,[c],null),be([jg(H(a)-b-1,kg(b,a))])))} +function nt(a,b){var c=null!=a&&(a.m&64||q===a.G)?P(U,a):a,d=D.c(c,pl),e=null!=d&&(d.m&64||q===d.G)?P(U,d):d;d=D.c(e,zn);e=D.c(e,Aj);var f=D.c(c,fl);D.c(c,no);var h=D.c(c,Oj),k=D.c(c,Rj),l=D.c(c,$l),p=D.c(c,im);p=95b?p.h?p.h(b):p.call(null,b):b;h=tr(p,h);return G.c(f,d+1)?t(k)?K.l(Yr(zg(c,new R(null,3,5,T,[il,e,d],null),h),d+1),vk,!0):zg(c,new R(null,3,5,T,[il,e,d],null),h):Yr(Ag.Z(c,new R(null,2,5,T,[il,e],null),t(l)?mt:lt,d,h),d+1)} +function ot(a,b){var c=null!=a&&(a.m&64||q===a.G)?P(U,a):a,d=D.c(c,Rj),e=D.c(c,vk);t(t(d)?e:d)&&(c=null!=c&&(c.m&64||q===c.G)?P(U,c):c,d=D.c(c,pl),d=null!=d&&(d.m&64||q===d.G)?P(U,d):d,d=D.c(d,Aj),e=D.c(c,no),c=Yr(c,0),c=G.c(e,d+1)?Tr.h(c):$r(c,d+1));return c=nt(c,b)}function pt(a){a=null!=a&&(a.m&64||q===a.G)?P(U,a):a;var b=D.c(a,fl),c=D.c(a,no);return K.l(a,il,Wg(qg(c,Wg(qg(b,new R(null,2,5,T,[69,Ef],null))))))} +function qt(a){a=null!=a&&(a.m&64||q===a.G)?P(U,a):a;var b=D.c(a,pl);b=null!=b&&(b.m&64||q===b.G)?P(U,b):b;b=D.c(b,Aj);var c=D.c(a,fl),d=D.c(a,Oj);return zg(a,new R(null,2,5,T,[il,b],null),gr.c(c,d))}function rt(a,b,c){return Wg(O.c(jg(b,a),qg(H(a)-b,vr(c))))}function st(a,b,c){return Wg(O.c(qg(b+1,vr(c)),kg(b+1,a)))} +function tt(a){a=null!=a&&(a.m&64||q===a.G)?P(U,a):a;var b=D.c(a,pl),c=null!=b&&(b.m&64||q===b.G)?P(U,b):b;b=D.c(c,zn);c=D.c(c,Aj);var d=D.c(a,fl),e=D.c(a,Oj);--d;return Ag.Z(a,new R(null,2,5,T,[il,c],null),rt,b=k?Zr(c,k-1):c,m=Mb(D,p,new R(null,2,5,T,[pl,zn],null));return Ag.l(p,new R(null,2,5,T,[il,h],null),function(a,b,c,d,e,f,h,k,m,l,p,Q){return function(a){return Wg(O.A(jg(b,a),kg(b+c,a),be([qg(c,vr(Q))])))}}(p,m,function(){var a=k-m;return b=a}}(c,b)(b)}()))return Gu(a,b+64);throw Jt;}catch(h){if(h instanceof Error){var d=h;if(d===Jt)try{if(55===b)return Bg(a,V,ms);throw Jt;}catch(k){if(k instanceof Error){var e=k;if(e===Jt)try{if(56===b)return Bg(a,V,ns);throw Jt;}catch(l){if(l instanceof Error){var f=l;if(f===Jt)try{if(99===b)return du(a); +throw Jt;}catch(p){if(p instanceof Error){d=p;if(d===Jt)throw Jt;throw d;}throw p;}else throw f;}else throw l;}else throw e;}else throw k;}else throw d;}else throw h;}else throw Jt;}catch(h){if(h instanceof Error)if(d=h,d===Jt)try{if(35===c)try{if(56===b)return Bg(a,V,pt);throw Jt;}catch(k){if(k instanceof Error){e=k;if(e===Jt)throw Jt;throw e;}throw k;}else throw Jt;}catch(k){if(k instanceof Error)if(e=k,e===Jt)try{if(40===c)try{if(48===b)return Zt(a);throw Jt;}catch(l){if(l instanceof Error){f= +l;if(f===Jt)return $t(a);throw f;}throw l;}else throw Jt;}catch(l){if(l instanceof Error){f=l;if(f===Jt)return a;throw f;}throw l;}else throw e;else throw k;}else throw d;else throw h;}},function(a){return a},function(a){return a},Gu,function(a,b){return Cg(a,V,ot,b)},function(a,b){var c=function(){switch(b){case 64:return eu;case 65:return fu;case 66:return gu;case 67:return hu;case 68:return iu;case 69:return ju;case 70:return ku;case 71:return lu;case 72:return mu;case 73:return nu;case 74:return ou; +case 75:return pu;case 76:return su;case 77:return tu;case 80:return uu;case 83:return qu;case 84:return ru;case 87:return vu;case 88:return wu;case 90:return xu;case 96:return lu;case 97:return hu;case 100:return Du;case 101:return fu;case 102:return mu;case 103:return yu;case 104:return zu;case 108:return Au;case 109:return Cu;case 112:return Eu;case 114:return Fu;default:return null}}();return t(c)?c.h?c.h(a):c.call(null,a):a},function(a){return a},function(a,b){return K.l(a,kk,ge.c(kk.h(a),b))}, +function(a){return a},function(a,b){return K.l(a,rk,ge.c(rk.h(a),b))},function(a){return a},function(a){return a},function(a){return K.A(a,rk,he,be([kk,he]))}]);function Iu(a,b){for(var c=a,d=Tl.h(c),e=b;;){var f=y(e);if(t(f)){var h=160<=f?65:f;h=D.c(d.h?d.h(xq):d.call(null,xq),h);d=J(h,0,null);h=J(h,1,null);a:for(;;)if(E(h)){var k=y(h);k=Hu.h?Hu.h(k):Hu.call(null,k);c=k.c?k.c(c,f):k.call(null,c,f);h=z(h)}else break a;e=vd(e)}else return K.l(c,Tl,d)}} +function Ju(a,b){var c=xg(function(a){return a.codePointAt(0)},b);return Iu(a,c)} +function Ku(a,b){try{if(ze(b)&&3===H(b)){var c=Vd(b,0),d=Vd(b,1),e=Vd(b,2);return[v.h(a+8),";2;",v.h(c),";",v.h(d),";",v.h(e)].join("")}throw Jt;}catch(k){if(k instanceof Error){var f=k;if(f===Jt)try{if(t(function(){return function(){return function(a){return 8>a}}(f)(b)}()))return""+v.h(a+b);throw Jt;}catch(l){if(l instanceof Error){var h=l;if(h===Jt)try{if(t(function(){return function(){return function(a){return 16>a}}(h,f)(b)}()))return""+v.h(a+52+b);throw Jt;}catch(p){if(p instanceof Error){c= +p;if(c===Jt)return[v.h(a+8),";5;",v.h(b)].join("");throw c;}throw p;}else throw h;}else throw l;}else throw f;}else throw k;}}ag.c(Ku,30);ag.c(Ku,40);var Lu=function Lu(a){if(null!=a&&null!=a.yd)return a.yd(a);var c=Lu[n(null==a?null:a)];if(null!=c)return c.h?c.h(a):c.call(null,a);c=Lu._;if(null!=c)return c.h?c.h(a):c.call(null,a);throw Cb("Screen.lines",a);},Mu=function Mu(a){if(null!=a&&null!=a.xd)return a.xd(a);var c=Mu[n(null==a?null:a)];if(null!=c)return c.h?c.h(a):c.call(null,a);c=Mu._;if(null!=c)return c.h?c.h(a):c.call(null,a);throw Cb("Screen.cursor",a);};function Nu(a,b){var c=0parseFloat(Iv)){Hv=String(Kv);break a}}Hv=Iv}var gb={}; +function Lv(a){return fb(a,function(){for(var b=0,c=ra(String(Hv)).split("."),d=ra(String(a)).split("."),e=Math.max(c.length,d.length),f=0;0==b&&f=a.keyCode)a.keyCode=-1}catch(b){}};var Uv="closure_listenable_"+(1E6*Math.random()|0),Vv=0;function Wv(a,b,c,d,e){this.listener=a;this.Xd=null;this.src=b;this.type=c;this.capture=!!d;this.Ub=e;this.key=++Vv;this.$c=this.Fd=!1}function Xv(a){a.$c=!0;a.listener=null;a.Xd=null;a.src=null;a.Ub=null};function Yv(a){this.src=a;this.rb={};this.wd=0}Yv.prototype.add=function(a,b,c,d,e){var f=a.toString();a=this.rb[f];a||(a=this.rb[f]=[],this.wd++);var h=Zv(a,b,d,e);-1e.keyCode||void 0!=e.returnValue)){a:{var f=!1;if(0==e.keyCode)try{e.keyCode=-1;break a}catch(l){f=!0}if(f||void 0==e.returnValue)e.returnValue=!0}e=[];for(f=c.currentTarget;f;f=f.parentNode)e.push(f);f=a.type;for(var h=e.length-1;!c.Kc&&0<=h;h--){c.currentTarget=e[h];var k=nw(e[h],f,!0,c);d=d&&k}for(h=0;!c.Kc&& +h>>0);function fw(a){if(ha(a))return a;a[pw]||(a[pw]=function(b){return a.handleEvent(b)});return a[pw]};function qw(){wv.call(this);this.Ib=new Yv(this);this.ff=this;this.ve=null}qa(qw,wv);qw.prototype[Uv]=!0;g=qw.prototype;g.addEventListener=function(a,b,c,d){dw(this,a,b,c,d)};g.removeEventListener=function(a,b,c,d){lw(this,a,b,c,d)}; +g.dispatchEvent=function(a){var b,c=this.ve;if(c)for(b=[];c;c=c.ve)b.push(c);c=this.ff;var d=a.type||a;if(ca(a))a=new Sv(a,c);else if(a instanceof Sv)a.target=a.target||c;else{var e=a;a=new Sv(d,c);Ia(a,e)}e=!0;if(b)for(var f=b.length-1;!a.Kc&&0<=f;f--){var h=a.currentTarget=b[f];e=rw(h,d,!0,a)&&e}a.Kc||(h=a.currentTarget=c,e=rw(h,d,!0,a)&&e,a.Kc||(e=rw(h,d,!1,a)&&e));if(b)for(f=0;!a.Kc&&fthis.head?(Yw(this.o,this.fa,a,0,this.o.length-this.fa),Yw(this.o,0,a,this.o.length-this.fa,this.head),this.fa=0,this.head=this.length,this.o=a):this.fa===this.head?(this.head=this.fa=0,this.o=a):null};function ax(a,b){for(var c=a.length,d=0;;)if(da)){a+=1;continue}break}hx=!1;return 0c)return a;a:for(;;){var e=cMath.random()&&15>d)d+=1;else break a;if(d>this.level){for(var e=this.level+1;;)if(e<=d+1)c[e]=this.header,e+=1;else break;this.level=d}for(d=Ex(a,b,Array(d));;)return 0<=this.level?(c=c[0].forward,d.forward[0]=c[0],c[0]=d):null}; +Gx.prototype.remove=function(a){var b=Array(15),c=Fx(this.header,a,this.level,b);c=0===c.forward.length?null:c.forward[0];if(null!=c&&c.key===a){for(a=0;;)if(a<=this.level){var d=b[a].forward;c===(ad)return c===b.header?null:c;var e;a:for(e=c;;){e=d=a)break a}null!=e?(--d,c=e):--d}}Gx.prototype.S=function(){return function(a){return function d(c){return new kf(null,function(){return function(){return null==c?null:ae(new R(null,2,5,T,[c.key,c.H],null),d(c.forward[0]))}}(a),null,null)}}(this)(this.header.forward[0])}; +Gx.prototype.R=function(a,b,c){return Y(b,function(){return function(a){return Y(b,Qi,""," ","",c,a)}}(this),"{",", ","}",c,this)};var Ix=new Gx(Ex(null,null,0),0);function Jx(a){var b=(new Date).valueOf()+a,c=Hx(b),d=t(t(c)?c.keya:b)?a+8:a,[v.h(c),v.h(a)].join("")):null} +function Vy(a){var b=J(a,0,null),c=J(a,1,null);a=J(a,2,null);return["rgb(",v.h(b),",",v.h(c),",",v.h(a),")"].join("")} +var Wy=hj(function(a){a=null!=a&&(a.m&64||q===a.G)?P(U,a):a;var b=D.c(a,Nk),c=D.c(a,pl);a=K.l(a,Nk,t(c)?wb(b):b);var d=null!=a&&(a.m&64||q===a.G)?P(U,a):a,e=D.c(d,Ok),f=D.c(d,Tn);b=D.c(d,Kj);var h=D.c(d,dk);c=D.c(d,Vl);var k=D.c(d,Nk),l=D.c(d,Yn);d=D.c(d,pl);var p=t(k)?t(e)?e:"fg":f;e=Uy(t(k)?t(f)?f:"bg":e,b,"fg-");h=Uy(p,h,"bg-");c=vg(ub,new R(null,6,5,T,[e,h,t(b)?"bright":null,t(l)?"italic":null,t(c)?"underline":null,t(d)?"cursor":null],null));if(E(c))a:for(b=new cb,c=E(c);;)if(null!=c)b.append(""+ +v.h(y(c))),c=z(c),null!=c&&b.append(" ");else{b=b.toString();break a}else b=null;l=null!=a&&(a.m&64||q===a.G)?P(U,a):a;a=D.c(l,Ok);c=D.c(l,Tn);h=D.c(l,Nk);l=t(h)?c:a;a=t(h)?a:c;a=hi.A(be([t(ze.h?ze.h(l):ze.call(null,l))?new r(null,1,[ik,Vy(l)],null):null,t(ze.h?ze.h(a):ze.call(null,a))?new r(null,1,[al,Vy(a)],null):null]));return hi.A(be([t(b)?new r(null,1,[vn,b],null):null,t(a)?new r(null,1,[fm,a],null):null]))}); +function Xy(a,b){var c=J(a,0,null),d=J(a,1,null);d=Bg(d,pl,function(){return function(a){return t(a)?B(b):a}}(a,c,d));return new R(null,3,5,T,[ro,Wy.h?Wy.h(d):Wy.call(null,d),c],null)}function Yy(a,b){var c=J(a,0,null),d=J(a,1,null),e=jg(b,c);e=E(e)?new R(null,2,5,T,[Eo(e),d],null):null;var f=K.l(d,pl,!0);f=new R(null,2,5,T,[Vd(c,b),f],null);c=kg(b+1,c);d=E(c)?new R(null,2,5,T,[Eo(c),d],null):null;return vg(ub,new R(null,3,5,T,[e,f,d],null))} +function Zy(a,b){for(var c=he,d=a,e=b;;)if(E(d)){var f=y(d),h=J(f,0,null);J(f,1,null);h=H(h);if(h<=e)c=ge.c(c,f),d=vd(d),e-=h;else return O.A(c,Yy(f,e),be([vd(d)]))}else return c}function $y(a,b,c){a=t(B(b))?Zy(B(a),B(b)):B(a);return new R(null,2,5,T,[Lm,Ii(bg(function(){return function(a,b){return pe(new R(null,3,5,T,[Xy,b,c],null),new r(null,1,[mk,a],null))}}(a),a))],null)}var qA=new ti(null,new r(null,3,["small",null,"medium",null,"big",null],null),null); +function rA(a,b,c,d,e){var f=yp(function(){var a=B(c);return t(qA.h?qA.h(a):qA.call(null,a))?["font-",v.h(a)].join(""):null}),h=yp(function(){return function(){var d=B(a),e=B(b),f=B(c);f=t(qA.h?qA.h(f):qA.call(null,f))?null:new r(null,1,[wk,f],null);return hi.A(be([new r(null,2,[fl,[v.h(d),"ch"].join(""),no,[v.h(1.3333333333*e),"em"].join("")],null),f]))}}(f)),k=yp(function(){return function(){return Lu(B(d))}}(f,h)),l=yp(function(a,c,d){return function(){return xg(function(a,b,c){return function(d){return yp(function(a, +b,c){return function(){return D.c(B(c),d)}}(a,b,c))}}(a,c,d),Fi(0,B(b),1))}}(f,h,k)),p=yp(function(){return function(){return Mu(B(d))}}(f,h,k,l)),m=yp(function(a,b,c,d,e){return function(){return zn.h(B(e))}}(f,h,k,l,p)),u=yp(function(a,b,c,d,e){return function(){return Aj.h(B(e))}}(f,h,k,l,p,m)),w=yp(function(a,b,c,d,e){return function(){return On.h(B(e))}}(f,h,k,l,p,m,u));return function(a,b,c,d,f,h,k,l){return function(){return new R(null,3,5,T,[Gm,new r(null,2,[vn,B(a),fm,B(b)],null),bg(function(a, +b,c,d,f,h,k,l){return function(m,p){var u=yp(function(a,b,c,d,e,f,h,k){return function(){var a=B(k);return t(a)?(a=G.c(m,B(h)))?B(f):a:a}}(a,b,c,d,f,h,k,l));return pe(new R(null,4,5,T,[$y,p,u,e],null),new r(null,1,[mk,m],null))}}(a,b,c,d,f,h,k,l),B(d))],null)}}(f,h,k,l,p,m,u,w)} +function sA(){return new R(null,2,5,T,[Ym,new r(null,4,[Mn,"1.1",Fl,"0 0 866.0254037844387 866.0254037844387",vn,"icon",mo,new r(null,1,[An,'\x3cdefs\x3e \x3cmask id\x3d"small-triangle-mask"\x3e \x3crect width\x3d"100%" height\x3d"100%" fill\x3d"white"/\x3e \x3cpolygon points\x3d"508.01270189221935 433.01270189221935, 208.0127018922194 259.8076211353316, 208.01270189221927 606.217782649107" fill\x3d"black"\x3e\x3c/polygon\x3e \x3c/mask\x3e \x3c/defs\x3e \x3cpolygon points\x3d"808.0127018922194 433.01270189221935, 58.01270189221947 -1.1368683772161603e-13, 58.01270189221913 866.0254037844386" mask\x3d"url(#small-triangle-mask)" fill\x3d"white"\x3e\x3c/polygon\x3e \x3cpolyline points\x3d"481.2177826491071 333.0127018922194, 134.80762113533166 533.0127018922194" stroke\x3d"white" stroke-width\x3d"90"\x3e\x3c/polyline\x3e'],null)], +null)],null)}function tA(){return new R(null,3,5,T,[Ym,new r(null,3,[Mn,"1.1",Fl,"0 0 12 12",vn,"icon"],null),new R(null,2,5,T,[Fj,new r(null,1,[pn,"M1,0 L11,6 L1,12 Z"],null)],null)],null)}function uA(){return new R(null,4,5,T,[Ym,new r(null,3,[Mn,"1.1",Fl,"0 0 12 12",vn,"icon"],null),new R(null,2,5,T,[Fj,new r(null,1,[pn,"M1,0 L4,0 L4,12 L1,12 Z"],null)],null),new R(null,2,5,T,[Fj,new r(null,1,[pn,"M8,0 L11,0 L11,12 L8,12 Z"],null)],null)],null)} +function vA(){return new R(null,4,5,T,[Ym,new r(null,3,[Mn,"1.1",Fl,"0 0 12 12",vn,"icon"],null),new R(null,2,5,T,[Fj,new r(null,1,[pn,"M12,0 L7,0 L9,2 L7,4 L8,5 L10,3 L12,5 Z"],null)],null),new R(null,2,5,T,[Fj,new r(null,1,[pn,"M0,12 L0,7 L2,9 L4,7 L5,8 L3,10 L5,12 Z"],null)],null)],null)} +function wA(){return new R(null,4,5,T,[Ym,new r(null,3,[Mn,"1.1",Fl,"0 0 12 12",vn,"icon"],null),new R(null,2,5,T,[Fj,new r(null,1,[pn,"M7,5 L7,0 L9,2 L11,0 L12,1 L10,3 L12,5 Z"],null)],null),new R(null,2,5,T,[Fj,new r(null,1,[pn,"M5,7 L0,7 L2,9 L0,11 L1,12 L3,10 L5,12 Z"],null)],null)],null)}function xA(a,b){return function(b){return function(){return new R(null,3,5,T,[cl,new r(null,1,[Sl,b],null),new R(null,1,5,T,[t(B(a))?uA:tA],null)],null)}}(Ty(b,new fy(null,null,null)))} +function yA(a){return 10>a?["0",v.h(a)].join(""):a}function zA(a){var b=Math.floor((a%60+60)%60);return[v.h(yA(Math.floor(a/60))),":",v.h(yA(b))].join("")}function AA(a,b){var c=T,d=new R(null,2,5,T,[Yk,zA(B(a))],null),e=T;var f=B(a);var h=B(b);f=["-",v.h(zA(h-f))].join("");return new R(null,3,5,c,[Ml,d,new R(null,2,5,e,[co,f],null)],null)} +function BA(){function a(a){a.preventDefault();return Ry(a.currentTarget.parentNode.parentNode.parentNode)}return function(){return new R(null,4,5,T,[un,new r(null,1,[Sl,a],null),new R(null,1,5,T,[vA],null),new R(null,1,5,T,[wA],null)],null)}} +function CA(a,b){var c=Sy(b,function(a){var b=a.currentTarget.offsetWidth,c=a.currentTarget.getBoundingClientRect();return cy(Nu(a.clientX-c.left,b)/b)}),d=yp(function(){return function(){return[v.h(100*B(a)),"%"].join("")}}(c));return function(a,b){return function(){return new R(null,2,5,T,[Vj,new R(null,3,5,T,[Bl,new r(null,1,[Ql,a],null),new R(null,2,5,T,[Cj,new R(null,2,5,T,[ro,new r(null,1,[fm,new r(null,1,[fl,B(b)],null)],null)],null)],null)],null)],null)}}(c,d)} +function DA(a,b,c,d){return function(e){return function(){return new R(null,5,5,T,[Kk,new R(null,3,5,T,[xA,a,d],null),new R(null,3,5,T,[AA,b,c],null),new R(null,1,5,T,[BA],null),new R(null,3,5,T,[CA,e,d],null)],null)}}(yp(function(){return B(b)/B(c)}))} +function EA(a){return function(a){return function(){return new R(null,3,5,T,[ol,new r(null,1,[Sl,a],null),new R(null,2,5,T,[Xk,new R(null,2,5,T,[km,new R(null,2,5,T,[ro,new R(null,1,5,T,[sA],null)],null)],null)],null)],null)}}(Ty(a,new fy(null,null,null)))}function FA(){return new R(null,2,5,T,[Ek,new R(null,1,5,T,[xn],null)],null)}function GA(a){return Wf(function(b){return a[b]},new R(null,4,5,T,["altKey","shiftKey","metaKey","ctrlKey"],null))} +function HA(a){var b=t(GA(a))?null:function(){switch(a.key){case " ":return new fy(null,null,null);case "f":return bm;case "0":return cy(0);case "1":return cy(.1);case "2":return cy(.2);case "3":return cy(.3);case "4":return cy(.4);case "5":return cy(.5);case "6":return cy(.6);case "7":return cy(.7);case "8":return cy(.8);case "9":return cy(.9);default:return null}}();if(t(b))return b;switch(a.key){case "\x3e":return new ey(null,null,null);case "\x3c":return new dy(null,null,null);default:return null}} +function IA(a){if(t(GA(a)))return null;switch(a.which){case 37:return new ay(null,null,null);case 39:return new $x(null,null,null);default:return null}}function JA(a){var b=HA(a);return t(b)?(a.preventDefault(),G.c(b,bm)?(Ry(a.currentTarget),null):b):null}function KA(a){var b=IA(a);return t(b)?(a.preventDefault(),b):null} +function LA(a,b,c,d){a=t(a)?['"',v.h(a),'"'].join(""):"untitled";return new R(null,4,5,T,[dl,t(d)?new R(null,2,5,T,[jo,new r(null,1,[zl,d],null)],null):null,a,t(b)?new R(null,3,5,T,[ro," by ",t(c)?new R(null,3,5,T,[lo,new r(null,1,[ho,c],null),b],null):b],null):null],null)} +function MA(a){var b=Mx(1,ig.h(iy)),c=Kx(1);lx(function(c){return function(){var d=function(){return function(a){return function(){function b(b){for(;;){a:try{for(;;){var c=a(b);if(!N(c,Z)){var d=c;break a}}}catch(x){if(x instanceof Object)b[5]=x,Cx(b),d=Z;else throw x;}if(!N(d,Z))return d}}function c(){var a=[null,null,null,null,null,null,null,null,null,null,null,null];a[0]=d;a[1]=1;return a}var d=null;d=function(a){switch(arguments.length){case 0:return c.call(this);case 1:return b.call(this,a)}throw Error("Invalid arity: "+ +(arguments.length-1));};d.B=c;d.h=b;return d}()}(function(){return function(c){var d=c[1];if(7===d)return c[7]=c[2],Ax(c,12,b,!1);if(1===d)return c[2]=null,c[1]=2,Z;if(4===d)return c[8]=c[2],Ax(c,5,b,!0);if(6===d)return d=Jx(3E3),Ux(c,8,new R(null,2,5,T,[a,d],null));if(3===d)return Bx(c,c[2]);if(12===d)return c[9]=c[2],c[2]=null,c[1]=2,Z;if(2===d)return zx(c,4,a);if(11===d)return c[2]=c[2],c[1]=7,Z;if(9===d)return c[2]=null,c[1]=6,Z;if(5===d)return c[10]=c[2],c[2]=null,c[1]=6,Z;if(10===d)return c[2]= +null,c[1]=11,Z;if(8===d){var e=c[2];d=J(e,0,null);e=J(e,1,null);e=G.c(e,a);c[11]=d;c[1]=e?9:10;return Z}return null}}(c),c)}(),f=function(){var a=d.B?d.B():d.call(null);a[6]=c;return a}();return yx(f)}}(c));return b} +function NA(a,b){var c=dg.h(b),d=Kx(1);lx(function(b,c){return function(){var d=function(){return function(a){return function(){function b(b){for(;;){a:try{for(;;){var c=a(b);if(!N(c,Z)){var d=c;break a}}}catch(F){if(F instanceof Object)b[5]=F,Cx(b),d=Z;else throw F;}if(!N(d,Z))return d}}function c(){var a=[null,null,null,null,null,null,null,null,null,null,null,null,null];a[0]=d;a[1]=1;return a}var d=null;d=function(a){switch(arguments.length){case 0:return c.call(this);case 1:return b.call(this, +a)}throw Error("Invalid arity: "+(arguments.length-1));};d.B=c;d.h=b;return d}()}(function(b,c){return function(d){var e=d[1];if(7===e){var f=d[7],h=wb(null==f);d[8]=d[2];d[1]=h?8:9;return Z}if(20===e)return f=d[7],d[1]=t(q===f.Fe)?23:24,Z;if(27===e)return d[2]=!1,d[1]=28,Z;if(1===e)return d[2]=null,d[1]=2,Z;if(24===e)return f=d[7],d[1]=t(!f.Tc)?26:27,Z;if(4===e){f=d[7];var k=d[9];h=d[2];var l=J(h,0,null),m=J(h,1,null);d[10]=m;d[7]=l;d[9]=h;d[1]=t(null==l)?5:6;return Z}return 15===e?(d[2]=!1,d[1]= +16,Z):21===e?(f=d[7],h=Ab(Yx,f),d[2]=h,d[1]=22,Z):31===e?(d[11]=d[2],d[2]=null,d[1]=2,Z):13===e?(d[2]=d[2],d[1]=10,Z):22===e?(d[1]=t(d[2])?29:30,Z):29===e?(f=d[7],h=B(a),h=Zx(f,h),h=gg.l(c,wo,h),d[2]=h,d[1]=31,Z):6===e?(d[2]=null,d[1]=7,Z):28===e?(d[2]=d[2],d[1]=25,Z):25===e?(d[2]=d[2],d[1]=22,Z):17===e?(m=d[10],f=d[7],k=d[9],h=gg.c(a,function(){return function(a,b){return function(a){return Xx(b,a)}}(k,f,m,m,f,k,e,b,c)}()),d[2]=h,d[1]=19,Z):3===e?Bx(d,d[2]):12===e?(f=d[7],d[1]=t(!f.Tc)?14:15,Z): +2===e?(h=B(c),h=E(h),Ux(d,4,h)):23===e?(d[2]=!0,d[1]=25,Z):19===e?(f=d[7],h=wb(null==f),d[12]=d[2],d[1]=h?20:21,Z):11===e?(d[2]=!0,d[1]=13,Z):9===e?(f=d[7],h=Ab(Wx,f),d[2]=h,d[1]=10,Z):5===e?(m=d[10],h=gg.l(c,re,m),d[2]=h,d[1]=7,Z):14===e?(f=d[7],h=Ab(Wx,f),d[2]=h,d[1]=16,Z):26===e?(f=d[7],h=Ab(Yx,f),d[2]=h,d[1]=28,Z):16===e?(d[2]=d[2],d[1]=13,Z):30===e?(d[2]=null,d[1]=31,Z):10===e?(d[1]=t(d[2])?17:18,Z):18===e?(d[2]=null,d[1]=19,Z):8===e?(f=d[7],d[1]=t(q===f.sb)?11:12,Z):null}}(b,c),b,c)}(),e=function(){var a= +d.B?d.B():d.call(null);a[6]=b;return a}();return yx(e)}}(d,c));return d} +function OA(a,b,c){c=Ty(c,!0);var d=Sy(b,JA),e=Sy(b,KA),f=yp(function(){return function(){return Hm.h(B(a))}}(c,d,e)),h=yp(function(){return function(){return el.h(B(a))}}(c,d,e,f)),k=yp(function(a,b,c,d,e){return function(){var a=B(d);return t(a)?a:B(e)}}(c,d,e,f,h)),l=yp(function(b,c,d,e,f,h){return function(){var b=Gk.h(B(a));b=t(b)?b:wb(B(h));return t(b)?"hud":null}}(c,d,e,f,h,k)),p=yp(function(){return function(){return["asciinema-theme-",v.h(gm.h(B(a)))].join("")}}(c,d,e,f,h,k,l)),m=yp(function(){return function(){var b= +fl.h(B(a));return t(b)?b:80}}(c,d,e,f,h,k,l,p)),u=yp(function(){return function(){var b=no.h(B(a));return t(b)?b:24}}(c,d,e,f,h,k,l,p,m)),w=yp(function(){return function(){return wk.h(B(a))}}(c,d,e,f,h,k,l,p,m,u)),x=yp(function(){return function(){return V.h(B(a))}}(c,d,e,f,h,k,l,p,m,u,w)),C=yp(function(){return function(){return ml.h(B(a))}}(c,d,e,f,h,k,l,p,m,u,w,x)),F=yp(function(){return function(){return jn.h(B(a))}}(c,d,e,f,h,k,l,p,m,u,w,x,C)),I=yp(function(){return function(){return Uj.h(B(a))}}(c, +d,e,f,h,k,l,p,m,u,w,x,C,F)),M=yp(function(){return function(){return wl.h(B(a))}}(c,d,e,f,h,k,l,p,m,u,w,x,C,F,I)),S=B(a),X=null!=S&&(S.m&64||q===S.G)?P(U,S):S,Ga=D.c(X,ki),db=D.c(X,li),Q=D.c(X,mi),xb=D.c(X,ni);return function(a,c,d,e,f,h,k,l,m,p,u,w,x,C,F,I,M,S,Q,X,Ga,db){return function(){return new R(null,3,5,T,[Cn,new r(null,5,[Jj,-1,Zj,c,Rn,d,Vm,a,vn,B(k)],null),new R(null,7,5,T,[Sm,new r(null,1,[vn,B(l)],null),new R(null,6,5,T,[rA,m,p,u,w,x],null),new R(null,5,5,T,[DA,C,F,I,b],null),t(t(Q)?Q: +X)?new R(null,5,5,T,[LA,Q,X,Ga,db],null):null,t(B(h))?null:new R(null,2,5,T,[EA,b],null),t(B(e))?new R(null,1,5,T,[FA],null):null],null)],null)}}(c,d,e,f,h,k,l,p,m,u,w,x,C,F,I,M,S,X,Ga,db,Q,xb)} +function PA(a){var b=Kx(null),c=Kx(new dx(bx(1),1));return function(b,c){return function(){return Pp(new r(null,4,[ln,"asciinema-player",Dm,function(b,c){return function(){return OA(a,b,c)}}(b,c),$k,function(b,c){return function(){var d=ty(Gl.h(B(a))),e=MA(c);Tx(e,b);return NA(a,Je([b,d]))}}(b,c),Wm,function(){return function(){return uy(Gl.h(B(a)))}}(b,c)],null))}}(b,c)};function QA(a,b){var c=null!=b&&(b.m&64||q===b.G)?P(U,b):b,d=D.c(c,Ak),e=D.c(c,Gl);d=a.h?a.h(d):a.call(null,d);zy(e,d);return K.l(c,Ak,d)}$x.prototype.sb=q;$x.prototype.qb=function(a,b){var c=null!=b&&(b.m&64||q===b.G)?P(U,b):b,d=D.c(c,Uj),e=D.c(c,wl),f=D.c(c,Gl);t(e)&&yy(f,Nu(d+5,e));return c};ay.prototype.sb=q;ay.prototype.qb=function(a,b){var c=null!=b&&(b.m&64||q===b.G)?P(U,b):b,d=D.c(c,Uj),e=D.c(c,wl),f=D.c(c,Gl);t(e)&&yy(f,Nu(d+-5,e));return c};by.prototype.sb=q; +by.prototype.qb=function(a,b){var c=null!=b&&(b.m&64||q===b.G)?P(U,b):b,d=D.c(c,wl),e=D.c(c,Gl);t(d)&&(d*=nn.h(this),yy(e,d));return c};dy.prototype.sb=q;dy.prototype.qb=function(a,b){return QA(function(){return function(a){return a/2}}(this),b)};ey.prototype.sb=q;ey.prototype.qb=function(a,b){return QA(function(){return function(a){return 2*a}}(this),b)};fy.prototype.sb=q;fy.prototype.qb=function(a,b){xy(Gl.h(b));return b};gy.prototype.sb=q;gy.prototype.qb=function(a,b){return K.l(b,ml,so.h(this))}; +hy.prototype.sb=q;hy.prototype.qb=function(a,b){return K.l(b,Gk,so.h(this))};jy.prototype.sb=q;jy.prototype.qb=function(a,b){var c=null!=a&&(a.m&64||q===a.G)?P(U,a):a;D.c(c,fl);D.c(c,no);D.c(c,wl);c=null!=b&&(b.m&64||q===b.G)?P(U,b):b;var d=D.c(c,fl),e=D.c(c,no),f=null!=this&&(this.m&64||q===this.G)?P(U,this):this,h=D.c(f,fl),k=D.c(f,no);f=D.c(f,wl);return K.A(c,fl,t(d)?d:h,be([no,t(e)?e:k,wl,f]))};ky.prototype.sb=q;ky.prototype.qb=function(a,b){return K.l(b,Hm,Hm.h(this))};oy.prototype.sb=q; +oy.prototype.qb=function(a,b){var c=null!=b&&(b.m&64||q===b.G)?P(U,b):b,d=D.c(c,oi);t(d)&&(ap(bp),d.B?d.B():d.call(null));return c};ry.prototype.sb=q;ry.prototype.qb=function(a,b){return K.l(b,Uj,Zk.h(this))};function RA(){return ig.l(function(a,b){return new R(null,2,5,T,[a,new gy(b,null,null,null)],null)},rg(function(a){return a+.5},.5),og(new R(null,2,5,T,[!1,!0],null)))}function SA(a){var b=Dy(RA());return K.l(K.l(a,ml,!0),Ol,b)} +function TA(a){a=null!=a&&(a.m&64||q===a.G)?P(U,a):a;var b=D.c(a,Ol);Tw(b);return K.l(K.l(a,ml,!0),Ol,null)}function UA(a){a=null!=a&&(a.m&64||q===a.G)?P(U,a):a;a=D.c(a,Ol);return t(a)?Je([a]):vi}my.prototype.sb=q; +my.prototype.qb=function(a,b){var c=null!=a&&(a.m&64||q===a.G)?P(U,a):a;D.c(c,jn);var d=null!=b&&(b.m&64||q===b.G)?P(U,b):b,e=D.c(d,jn);c=D.c(d,pi);var f=D.c(d,qi),h=null!=this&&(this.m&64||q===this.G)?P(U,this):this;h=D.c(h,jn);if(G.c(e,h))return d;d=K.A(d,jn,h,be([el,!0]));if(t(h))return t(c)&&(c.B?c.B():c.call(null)),SA(d);t(f)&&(f.B?f.B():f.call(null));return TA(d)};my.prototype.Fe=q;my.prototype.de=function(a,b){return UA(b)};py.prototype.sb=q; +py.prototype.qb=function(a,b){var c=K.l(b,V,V.h(this));c=null!=c&&(c.m&64||q===c.G)?P(U,c):c;var d=D.c(c,Ol);return t(d)?SA(TA(c)):c};py.prototype.Fe=q;py.prototype.de=function(a,b){return UA(b)};function VA(a){return t(a)?(a=ig.c(parseFloat,Fo(""+v.h(a),/:/)),a=ig.l(Ye,cf(a),rg(function(){return function(a){return 60*a}}(a),1)),P(Xe,a)):null} +function WA(a,b,c){t(a)?"string"===typeof a?t(0===a.indexOf("data:application/json;base64,"))?(b=a.substring(29).replace(RegExp("\\s","g"),""),b=JSON.parse(atob(b)),b=fj(b),b=new r(null,1,[V,new r(null,1,[il,b],null)],null)):t(0===a.indexOf("data:text/plain,"))?(a=a.substring(16),b=Ju(Ot(t(b)?b:80,t(c)?c:24),a),b=new r(null,1,[V,b],null)):b=t(0===a.indexOf("npt:"))?new r(null,1,[Zk,VA(a.substring(4))],null):null:b=new r(null,1,[V,new r(null,1,[il,a],null)],null):b=null;return b} +var XA=new r(null,2,[pl,new r(null,1,[On,!1],null),il,he],null); +function YA(a,b){var c=null!=b&&(b.m&64||q===b.G)?P(U,b):b,d=D.c(c,no),e=D.l(c,wk,"small"),f=D.l(c,Ak,1),h=D.c(c,Hk),k=D.c(c,fl),l=D.c(c,rl),p=D.l(c,cm,!1),m=D.l(c,gm,"asciinema"),u=D.c(c,qm),w=D.c(c,Bm),x=D.l(c,vm,!1),C=D.l(c,Em,!1),F=function(){var a=VA(h);return t(a)?a:0}();w=WA(w,k,d);var I=null!=w&&(w.m&64||q===w.G)?P(U,w):w;w=D.c(I,V);I=D.c(I,Zk);var M=t(I)?I:wb(w)&&0 pre { + max-height: none; +} diff --git a/Documentation/sphinx/asciicast.py b/Documentation/sphinx/asciicast.py new file mode 100644 index 00000000000000..3bff764b7dd2de --- /dev/null +++ b/Documentation/sphinx/asciicast.py @@ -0,0 +1,48 @@ +import os +import shutil + +from docutils import nodes, utils +from docutils.nodes import Body, Element +from docutils.parsers.rst import directives + +from sphinx.util import relative_uri +from sphinx.util.nodes import set_source_info +from sphinx.util.compat import Directive + + +class asciicast(Body, Element): + pass + +class Asciicast(Directive): + """Embed asciinama "movie" from specified cast file""" + + required_argument = 1 + optional_arguments = 1 + has_content = False + + def run(self): + env = self.state.document.settings.env + node = asciicast() + + node['relpath'], node['path'] = env.relfn2path(self.arguments[0]) + env.note_dependency(node['path']) + + return [node] + +def html_asciicast(self, node): + filename = os.path.basename(node['path']) + dst = os.path.join(self.builder.outdir, self.builder.imagedir, filename) + html_path = os.path.join(relative_uri(os.path.normpath(node['relpath']), self.builder.imagedir), filename) + body = ''.format(html_path) + try: + shutil.copy(node['path'], dst) + except shutil.Error as err: + self.builder.warn("failed to copy file: {}".format(err)) + self.body.append(body) + raise nodes.SkipNode + +def setup(app): + app.add_javascript("asciinema-player.js") + app.add_stylesheet("asciinema-player.css") + app.add_node(asciicast, html=(html_asciicast, None)) + app.add_directive('asciicast', Asciicast) diff --git a/Documentation/sphinx/ditaa.py b/Documentation/sphinx/ditaa.py new file mode 100644 index 00000000000000..80fc9b37b78967 --- /dev/null +++ b/Documentation/sphinx/ditaa.py @@ -0,0 +1,220 @@ +# -*- coding: utf-8 -*- +""" + sphinx.ext.ditaa + ~~~~~~~~~~~~~~~~~~~~~~~~~ + Allow ditaa-formatted graphs to by included in Sphinx-generated + documents inline. + :copyright: Copyright 2017 by Yongping Guo + :license: BSD, see LICENSE for details. +""" +from __future__ import print_function + +import re, os +import codecs +import posixpath +from os import path +from math import ceil +from subprocess import Popen, PIPE +try: + from hashlib import sha1 as sha +except ImportError: + from sha import sha + +from docutils import nodes +from docutils.parsers.rst import directives + +from sphinx.errors import SphinxError +from sphinx.util.osutil import ensuredir, ENOENT, EPIPE +from sphinx.util import relative_uri +#from sphinx.util.compat import Directive +from docutils.parsers.rst import Directive + +mapname_re = re.compile(r'`_ course from the Computer Science +and Engineering Department, the Faculty of Automatic Control and +Computers, University POLITEHNICA of Bucharest. + +You can get the latest version at http://github.com/linux-kernel-labs. + +To get started build the documentation from the sources after +installing docker-compose on you host: + +.. code-block:: c + + cd tools/labs && make docker-docs + +then point your browser at **Documentation/output/labs/index.html**. + +Alternatively, you can build directly on the host (see +tools/labs/docs/Dockerfile for dependencies): + +.. code-block:: c + + cd tools/labs && make docs + +.. toctree:: + + so2/index.rst + +.. toctree:: + :caption: Lectures + + lectures/intro.rst + lectures/syscalls.rst + lectures/processes.rst + lectures/interrupts.rst + lectures/smp.rst + lectures/address-space.rst + lectures/memory-management.rst + lectures/fs.rst + lectures/debugging.rst + lectures/networking.rst + lectures/arch.rst + lectures/virt.rst + +.. toctree:: + :caption: Labs + + labs/infrastructure.rst + labs/introduction.rst + labs/kernel_modules.rst + labs/kernel_api.rst + labs/device_drivers.rst + labs/interrupts.rst + labs/deferred_work.rst + labs/block_device_drivers.rst + labs/filesystems_part1.rst + labs/filesystems_part2.rst + labs/networking.rst + labs/arm_kernel_development.rst + labs/memory_mapping.rst + labs/device_model.rst + labs/kernel_profiling.rst + +.. toctree:: + :caption: Useful info + + info/vm.rst + info/extra-vm.rst + info/contributing.rst + diff --git a/Documentation/teaching/info/contributing.rst b/Documentation/teaching/info/contributing.rst new file mode 100644 index 00000000000000..d18c5ae1149e9f --- /dev/null +++ b/Documentation/teaching/info/contributing.rst @@ -0,0 +1,206 @@ +================================= +Contributing to linux-kernel-labs +================================= + +``linux-kernel-labs`` is an open platform. +You can help it get better by contributing to the documentation, exercises or +the infrastructure. +All contributions are welcome, no matter if they are just fixes for typos or +new sections in the documentation. + +All information required for making a contribution can be found in the +`linux-kernel-labs Linux repo `_. +In order to change anything, you need to create a Pull Request (``PR``) +from your own fork to this repository. +The PR will be reviewed by the members of the team and will be merged once +any potential issue is fixed. + +******************** +Repository structure +******************** + +The `linux-kernel-labs repo `_ is +a fork of the Linux kernel repo, with the following additions: + + * ``/tools/labs``: contains the labs and the :ref:`virtual machine (VM) infrastructure` + + * ``tools/labs/templates``: contains the skeletons sources + * ``tools/labs/qemu``: contains the qemu VM configuration + + * ``/Documentation/teaching``: contains the sources used to generate this + documentation + +************************** +Building the documentation +************************** + +To build the documentation, navigate to ``tools/labs`` and run the following +command: + +.. code-block:: bash + + make docs + +.. note:: + The command should install all the required packages. + In some cases, installing the packages or building the documentation might + fail, because of broken dependencies versions. + + Instead of struggling to fix the dependencies, the simplest way to build + the documentation is using a `Docker `_. + First, install ``docker`` and ``docker-compose`` on your host, and then run: + + .. code-block:: bash + + make docker-docs + + The first run might take some time, but subsequent builds will be faster. + +*********************** +Creating a contribution +*********************** + +Forking the repository +====================== + +1. If you haven't done it already, clone the + `linux-kernel-labs repo `_ + repository locally: + + .. code-block:: bash + + $ mkdir -p ~/src + $ git clone git@github.com:linux-kernel-labs/linux.git ~/src/linux + +2. Go to https://github.com/linux-kernel-labs/linux, make sure you are logged + in and click ``Fork`` in the top right of the page. + +3. Add the forked repo as a new remote to the local repo: + + .. code-block:: bash + + $ git remote add my_fork git@github.com:/linux.git + +Now, you can push to your fork by using ``my_fork`` instead of ``origin`` +(e.g. ``git push my_fork master``). + +Creating a pull request +======================= + +.. warning:: + + Pull requests must be created from their own branches, which are started from + ``master``. + +1. Go to the master branch and make sure you have no local changes: + + .. code-block:: bash + + student@eg106:~/src/linux$ git checkout master + student@eg106:~/src/linux$ git status + On branch master + Your branch is up-to-date with 'origin/master'. + nothing to commit, working directory clean + + +2. Make sure the local master branch is up-to-date with linux-kernel-labs: + + .. code-block:: bash + + student@eg106:~/src/linux$ git pull origin master + + .. note:: + + You can also push the latest master to your forked repo: + + .. code-block:: bash + + student@eg106:~/src/linux$ git push my_fork master + +3. Create a new branch for your change: + + .. code-block:: bash + + student@eg106:~/src/linux$ git checkout -b + +4. Make some changes and commit them. In this example, we are going to change + ``Documentation/teaching/index.rst``: + + .. code-block:: bash + + student@eg106:~/src/linux$ vim Documentation/teaching/index.rst + student@eg106:~/src/linux$ git add Documentation/teaching/index.rst + student@eg106:~/src/linux$ git commit -m "" + + .. warning:: + + The commit message must include a relevant description of your change + and the location of the changed component. + + Examples: + + * ``documentation: index: Fix typo in the first section`` + * ``labs: block_devices: Change printk log level`` + +5. Push the local branch to your forked repository: + + .. code-block:: bash + + student@eg106:~/src/linux$ git push my_fork + +6. Open the Pull Request + + * Go to https://github.com and open your forked repository page + * Click ``New pull request``. + * Make sure base repository (left side) is ``linux-kernel-labs/linux`` and the + base is master. + * Make sure the head repository (right side) is your forked repo and the + compare branch is your pushed branch. + * Click ``Create pull request``. + +Making changes to a Pull Request +================================ + +After receiving feedback for your changes, you might need to update the Pull +Request. +Your goal is to do a new push on the same branch. For this, follow the next steps: + +1. Make sure your branch is still up to date with the ``linux-kernel-labs`` repo + ``master`` branch. + + .. code-block:: bash + + student@eg106:~/src/linux$ git fetch origin master + student@eg106:~/src/linux$ git rebase FETCH_HEAD + + .. note:: + + If you are getting conflicts, it means that someone else modified the same + files/lines as you and already merged the changes since you opened the + Pull Request. + + In this case, you will need to fix the conflicts by editing the + conflicting files manually (run ``git status`` to see these files). + After fixing the conflicts, add them using ``git add`` and then run + ``git rebase --continue``. + + +2. Apply the changes to your local files +3. Commit the changes. We want all the changes to be in the same commit, so + we will amend the changes to the initial commit. + + .. code-block:: bash + + student@eg106:~/src/linux$ git add Documentation/teaching/index.rst + student@eg106:~/src/linux$ git commit --amend + +4. Force-push the updated commit: + + .. code-block:: bash + + student@eg106:~/src/linux$ git push my_fork -f + + After this step, the Pull Request is updated. It is now up to the + linux-kernel-labs team to review the pull request and integrate your + contributions in the main project. + diff --git a/Documentation/teaching/info/extra-vm.rst b/Documentation/teaching/info/extra-vm.rst new file mode 100644 index 00000000000000..bd3b997ca48ddc --- /dev/null +++ b/Documentation/teaching/info/extra-vm.rst @@ -0,0 +1,166 @@ +===================================== +Customizing the Virtual Machine Setup +===================================== + +Connect to the Virtual Machine via SSH +-------------------------------------- + +The default Yocto image for the QEMU virtual machine +(``core-image-minimal-qemu``) provides the minimal functionality to run the +kernel and kernel modules. For extra features, such as an SSH connection, +a more complete image is required, such as ``core-image-sato-dev-qemu``. + +To use the new image, update the ``YOCTO_IMAGE`` variable in +``tools/labs/qemu/Makefile``: + +.. code-block:: shell + + YOCTO_IMAGE = core-image-sato-qemu$(ARCH).ext4 + +When you start the virtual machine the first time using ``make boot`` with the +new image configuration, it will download the image and then boot the virtual +machine. The image is larger (around 400MB) than the minimal image so expect +some time for the download. + +You then enter the virtual machine via ``minicom``, determine the IP address of +the ``eth0`` interface an then you can connect to the virtual machine via SSH: + +.. code-block:: shell + + $ minicom -D serial.pts + Poky (Yocto Project Reference Distro) 2.3 qemux86 /dev/hvc0 + + qemux86 login: root + root@qemux86:~# ip a s + 1: lo: mtu 65536 qdisc noqueue qlen 1000 + link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00 + inet 127.0.0.1/8 scope host lo + valid_lft forever preferred_lft forever + inet6 ::1/128 scope host + valid_lft forever preferred_lft forever + 2: eth0: mtu 1500 qdisc pfifo_fast qlen 1000 + link/ether 52:54:00:12:34:56 brd ff:ff:ff:ff:ff:ff + inet 172.213.0.18/24 brd 172.213.0.255 scope global eth0 + valid_lft forever preferred_lft forever + inet6 fe80::5054:ff:fe12:3456/64 scope link + valid_lft forever preferred_lft forever + 3: sit0@NONE: mtu 1480 qdisc noop qlen 1000 + link/sit 0.0.0.0 brd 0.0.0.0 + + $ ssh -l root 172.213.0.18 + The authenticity of host '172.213.0.18 (172.213.0.18)' can't be established. + RSA key fingerprint is SHA256:JUWUcD7LdvURNcamoPePMhqEjFFtUNLAqO+TtzUiv5k. + Are you sure you want to continue connecting (yes/no)? yes + Warning: Permanently added '172.213.0.18' (RSA) to the list of known hosts. + root@qemux86:~# uname -a + Linux qemux86 4.19.0+ #3 SMP Sat Apr 4 22:45:18 EEST 2020 i686 GNU/Linux + +Connecting a Debugger to the Virtual Machine Kernel +--------------------------------------------------- + +You can use GDB to connect to the running virtual machine kernel and inspect +the state of the kernel. You run ``make gdb`` in ``tools/labs/``: + +.. code-block:: shell + + .../linux/tools/labs$ make gdb + ln -fs /home/tavi/src/linux/vmlinux vmlinux + gdb -ex "target remote localhost:1234" vmlinux + GNU gdb (Ubuntu 7.11.1-0ubuntu1~16.04) 7.11.1 + Copyright (C) 2016 Free Software Foundation, Inc. + License GPLv3+: GNU GPL version 3 or later + This is free software: you are free to change and redistribute it. + There is NO WARRANTY, to the extent permitted by law. Type "show copying" + and "show warranty" for details. + This GDB was configured as "x86_64-linux-gnu". + Type "show configuration" for configuration details. + For bug reporting instructions, please see: + . + Find the GDB manual and other documentation resources online at: + . + For help, type "help". + Type "apropos word" to search for commands related to "word"... + Reading symbols from vmlinux...done. + Remote debugging using localhost:1234 + 0xc13cf2f2 in native_safe_halt () at ./arch/x86/include/asm/irqflags.h:53 + 53asm volatile("sti; hlt": : :"memory"); + (gdb) bt + #0 0xc13cf2f2 in native_safe_halt () at ./arch/x86/include/asm/irqflags.h:53 + #1 arch_safe_halt () at ./arch/x86/include/asm/irqflags.h:95 + #2 default_idle () at arch/x86/kernel/process.c:341 + #3 0xc101f136 in arch_cpu_idle () at arch/x86/kernel/process.c:332 + #4 0xc106a6dd in cpuidle_idle_call () at kernel/sched/idle.c:156 + #5 do_idle () at kernel/sched/idle.c:245 + #6 0xc106a8c5 in cpu_startup_entry (state=) + at kernel/sched/idle.c:350 + #7 0xc13cb14a in rest_init () at init/main.c:415 + #8 0xc1507a7a in start_kernel () at init/main.c:679 + #9 0xc10001da in startup_32_smp () at arch/x86/kernel/head_32.S:368 + #10 0x00000000 in ?? () + (gdb) + +Rebuild the Kernel Image +------------------------ + +The kernel image is built the first time the VM is started. To rebuild the +kernel remove the kernel image file defined by the ``ZIMAGE`` variable in +``tools/labs/qemu/Makefile``: + +.. code-block:: shell + + ZIMAGE = $(KDIR)/arch/$(ARCH)/boot/$(b)zImage + +Typically the full path of the kernel is ``arch/x86/boot/bzImage``. + +Once removed the kernel image is rebuild by using: + +.. code-block:: shell + + ~/src/linux/tools/labs$ make zImage + +or simply starting the virtual machine + +.. code-block:: shell + + ~/src/linux/tools/labs$ make boot + +Using Docker containers +----------------------- + +If your setup doesn't allow the installation of the packages required for the +laboratory setup, you can build and run a container that has all the setup +already prepared for the virtual machine environment. + +In order to run the containerized setup, you need to install the following +packages: + +* ``docker`` +* ``docker-compose`` + +In order to run the container infrastructure run the following command in the +``tools/labs/`` directory: + +.. code-block:: shell + + sergiu@local:~/src/linux/tools/labs$ make docker-kernel + ... + ubuntu@so2:~$ + +The first time you run the command above, it will take a long time, because you +will have to build the container environment and install the required +applications. + +Every time you run the ``make docker-kernel`` command, another shell will +connect to the container. This will allow you to work with multiple tabs. + +All the commands that you would use in the regular environment can be used in +the containerized environment. + +The linux repository is mounted in the ``/linux`` directory. All changes +you will make here will also be seen on your local instance. + +In order to stop the container use the following command: + +.. code-block:: shell + + make stop-docker-kernel diff --git a/Documentation/teaching/info/vm.rst b/Documentation/teaching/info/vm.rst new file mode 100644 index 00000000000000..7d3be932d90180 --- /dev/null +++ b/Documentation/teaching/info/vm.rst @@ -0,0 +1,130 @@ +.. _vm_link: + +===================== +Recommended Setup +===================== +The simplest way to achieve a functional setup is to follow the steps listed in `this repo `__. + +===================== +Virtual Machine Setup +===================== + +Practice work is designed to run on a QEMU based virtual machine. Kernel code +is developed and built on the host machine and then deployed and run on the +virtual machine. + +In order to run and use the virtual machine the following packages are required +on a Debian/Ubuntu system: + +* ``flex`` +* ``bison`` +* ``build-essential`` +* ``gcc-multilib`` +* ``libncurses5-dev`` +* ``qemu-system-x86`` +* ``qemu-system-arm`` +* ``python3`` +* ``minicom`` + +The ``kvm`` package is not strictly required, but will make the virtual machine +faster by using KVM support (with the ``-enable-kvm`` option to QEMU). If ``kvm`` +is absent, the virtual machine will still run (albeit slower) using emulation. + +The virtual machine setup uses prebuild Yocto images that it downloads and a +kernel image that it builds itself. The following images are supported: + +* ``core-image-minimal-qemu`` +* ``core-image-minimal-dev-qemu`` +* ``core-image-sato-dev-qemu`` +* ``core-image-sato-qemu`` +* ``core-image-sato-sdk-qemu`` + +By default, ``core-image-minimal-qemu`` it used. This setting can be changed by +updating the ``YOCTO_IMAGE`` variable in ``tools/labs/qemu/Makefile``. + +Starting the Virtual Machine +---------------------------- + +You start the virtual machine in the ``tools/labs/`` folder by running ``make +boot``: + +.. code-block:: shell + + .../linux/tools/labs$ make boot + +The first run of the ``make boot`` command will compile the kernel image and it +will take longer. Subsequent runs will only start the QEMU virtual machine, +with verbose output provided: + +.. code-block:: shell + + .../linux/tools/labs$ make boot + mkdir /tmp/tmp.7rWv63E9Wf + sudo mount -t ext4 -o loop core-image-minimal-qemux86.ext4 /tmp/tmp.7rWv63E9Wf + sudo make -C /home/razvan/school/so2/linux.git modules_install INSTALL_MOD_PATH=/tmp/tmp.7rWv63E9Wf + make: Entering directory '/home/razvan/school/so2/linux.git' + INSTALL crypto/crypto_engine.ko + INSTALL drivers/crypto/virtio/virtio_crypto.ko + INSTALL drivers/net/netconsole.ko + DEPMOD 4.19.0+ + make: Leaving directory '/home/razvan/school/so2/linux.git' + sudo umount /tmp/tmp.7rWv63E9Wf + rmdir /tmp/tmp.7rWv63E9Wf + sleep 1 && touch .modinst + qemu/create_net.sh tap0 + + dnsmasq: failed to create listening socket for 172.213.0.1: Address already in use + qemu/create_net.sh tap1 + + dnsmasq: failed to create listening socket for 127.0.0.1: Address already in use + /home/razvan/school/so2/linux.git/tools/labs/templates/assignments/6-e100/nttcp -v -i & + nttcp-l: nttcp, version 1.47 + nttcp-l: running in inetd mode on port 5037 - ignoring options beside -v and -p + bind: Address already in use + nttcp-l: service-socket: bind:: Address already in use, errno=98 + ARCH=x86 qemu/qemu.sh -kernel /home/razvan/school/so2/linux.git/arch/x86/boot/bzImage -device virtio-serial -chardev pty,id=virtiocon0 -device virtconsole,chardev=virtiocon0 -serial pipe:pipe1 -serial pipe:pipe2 -netdev tap,id=tap0,ifname=tap0,script=no,downscript=no -net nic,netdev=tap0,model=virtio -netdev tap,id=tap1,ifname=tap1,script=no,downscript=no -net nic,netdev=tap1,model=i82559er -drive file=core-image-minimal-qemux86.ext4,if=virtio,format=raw -drive file=disk1.img,if=virtio,format=raw -drive file=disk2.img,if=virtio,format=raw --append "root=/dev/vda loglevel=15 console=hvc0" --display none -s + qemu-system-i386: -chardev pty,id=virtiocon0: char device redirected to /dev/pts/68 (label virtiocon0) + +.. note:: To show the QEMU console use + +.. code-block:: shell + + .../linux/tools/labs$ QEMU_DISPLAY=gtk make boot + + This will show the VGA output and will also give + access to the standard keyboard. + +.. note:: The virtual machine setup scripts and configuration files are located + in ``tools/labs/qemu/``. + +.. _vm_interaction_link: + +Connecting to the Virtual Machine +--------------------------------- + +Once the virtual machine is started you can connect to it on the serial port. A +symbolic link named ``serial.pts`` is created to the emulated serial port +device: + +.. code-block:: shell + + .../linux/tools/labs$ ls -l serial.pts + lrwxrwxrwx 1 razvan razvan 11 Apr 1 08:03 serial.pts -> /dev/pts/68 + +On the host you use the ``minicom`` command to connect to the virtual machine +via the ``serial.pts`` link: + +.. code-block:: shell + + .../linux/tools/labs$ minicom -D serial.pts + [...] + Poky (Yocto Project Reference Distro) 2.3 qemux86 /dev/hvc0 + + qemux86 login: root + root@qemux86:~# + +.. note:: When you connect to the virtual machine, simply enter ``root`` at the + login prompt and you will get a root console, no password required. + +.. note:: You exit ``minicom`` by pressing ``Ctrl+a`` and then ``x``. You will + get a confirmation prompt and then you will exit ``minicom``. diff --git a/Documentation/teaching/labs/arm_kernel_development.rst b/Documentation/teaching/labs/arm_kernel_development.rst new file mode 100644 index 00000000000000..ade020c4911c6f --- /dev/null +++ b/Documentation/teaching/labs/arm_kernel_development.rst @@ -0,0 +1,387 @@ +========================= +Kernel Development on ARM +========================= + +Lab objectives +============== + +* get a feeling of what System on a Chip (SoC) means +* get familiar with embedded world using ARM as a supported architecture +* understand what a Board Support Package means (BSP) +* compile and boot an ARM kernel with Qemu using i.MX6UL platform as an example +* get familiar with hardware description using Device Trees + +System on a Chip +================ + +A System on a Chip (**SoC**) is an integrated circuit (**IC**) that integrates an entire system onto it. The components +that can be usually found on an SoC include a central processing unit (**CPU**), memory, input/output ports, storage devices +together with more sophisticated modules like audio digital interfaces, neural processing units (**NPU**) or graphical +processing units (**GPU**). + +SoCs can be used in various applications most common are: + - consumer electronics (TV sets, mobile phones, video game consoles) + - industrial computers (medical imaging, etc) + - automotive + - home appliances + +The leading architecture for SoCs is **ARM**. Worth mentioning here is that there are also x86-based SoCs platforms. Another thing +we need to keep an eye on is **RISC-V** an open standard instruction set architecture. + +A simplified view of an **ARM** platform is shown in the image below: + +.. image:: ../res/schematic.png + :align: center + +We will refer as a reference platform at NXP's `i.MX6UL `_ platform, but in general all SoC's contain the following building blocks: + + - one or more CPU cores + - a system bus + - clock and reset module + + - PLL + - OSC + - reset controller + + - interrupt controller + - timers + - memory controller + - peripheral controllers + + - `I2C `_ + - `SPI `_ + - `GPIO `_ + - `Ethernet `_ (for network) + - `uSDHC `_ (for storage) + - USB + - `UART `_ + - `I2S `_ (for sound) + - eLCDIF (for LCD Panel) + +Here is the complete block diagram for i.MX6UL platform: + +.. image:: https://www.nxp.com/assets/images/en/block-diagrams/IMX6UL-BD.jpg + :alt: IMX6UL-BD + :width: 60 % + :align: center + +i.MX6UL Evaluation Kit board looks like this: + +.. image:: https://www.compulab.com/wp-content/gallery/sbc-imx6ul/compulab_sbc-imx6ul_single-board-computer.jpg + :alt: imx6ul-evk + :width: 60 % + :align: center + +Other popular SoC boards: + + * `Broadcom Raspberry Pi `_ + * `Texas Instruments Beagle board `_ + * `Odroid Xu4 `_ + * `Nvidia Jetson Nano `_ + +Board Support package +===================== + +A board support package (**BSP**) is the minimal set of software packages that allow to demonstrate the capabilities of a certain hardware platform. This includes: + + - toolchain + - bootloader + - Linux kernel image, device tree files and drivers + - root filesystem + +Semiconductor manufacturers usually provide a **BSP** together with an evaluation board. BSP is typically bundled using `Yocto `_ + +Toolchain +========= +Because our development machines are mostly x86-based we need a cross compiler that can produce executable +code for ARM platform. + +We can build our own cross compiler from scratch using https://crosstool-ng.github.io/ or we can install one + +.. code-block:: bash + + $ sudo apt-get install gcc-arm-linux-gnueabihf g++-arm-linux-gnueabihf # for arm32 + $ sudo apt-get install gcc-aarch64-linux-gnu g++-aarch64-linux-gnu # for arm64 + +There are several of toolchain binaries depending on the configuration: + + - With "arm-eabi-gcc" you have the Linux system C library which will make calls into the kernel IOCTLs, e.g. for allocating memory pages to the process. + - With "arm-eabi-none-gcc" you are running on platform which doesn't have an operating system at all - so the C library is different to cope with that. + +Compiling the Linux kernel on ARM +--------------------------------- + +Compile the kernel for 32bit ARM boards: + +.. code-block:: bash + + # select defconfig based on your platform + $ ARCH=arm CROSS_COMPILE=arm-linux-gnueabihf- make imx_v6_v7_defconfig + # compile the kernel + $ ARCH=arm CROSS_COMPILE=arm-linux-gnueabihf- make -j8 + +Compile the kernel for 64bit ARM boards: + +.. code-block:: bash + + # for 64bit ARM there is a single config for all supported boards + $ ARCH=arm CROSS_COMPILE=arm-linux-gnueabihf- make defconfig + # compile the kernel + $ ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu- make -j8 + +Linux kernel image +================== + +The kernel image binary is named ``vmlinux`` and it can be found in the root of the kernel tree. Compressed image used for booting can be found under: + +- ``arch/arm/boot/Image``, for arm32 +- ``arch/arm64/boot/Image``, for arm64 + +.. code-block:: bash + + $ file vmlinux + vmlinux: ELF 32-bit LSB executable, ARM, EABI5 version 1 (SYSV), statically linked, not stripped + + $ file vmlinux + vmlinux: ELF 64-bit LSB shared object, ARM aarch64, version 1 (SYSV), statically linked, not stripped + +Rootfs +====== + +The root filesystem (``rootfs``) is the filesystem mounted at the top of files hierarchy (``/``). It should contain at least +the critical files allowing the system to boot to a shell. + +.. code-block:: bash + + root@so2$ tree -d -L 2 + ├── bin + ├── boot + ├── dev + ├── etc + ├── home + │   └── root + ├── lib + │   └── udev + ├── mnt + ├── proc + ├── sbin + │   └── init + ├── sys + ├── usr + │   ├── bin + │   ├── include + │   ├── lib + └── var + +As for ``x86`` we will make use of Yocto rootfs images. In order to download an ``ext4`` rootfs image for ``arm32`` one needs to run: + +.. code-block:: bash + + $ cd tools/labs/ + $ ARCH=arm make core-image-minimal-qemuarm.ext4 + +Device tree +=========== + +Device tree (**DT**) is a tree structure used to describe the hardware devices in a system. Each node in the tree describes a device hence it is called **device node**. DT was introduced +to provide a way to discover non-discoverable hardware (e.g a device on an I2C bus). This information was previously stored inside the source code for the Linux kernel. This meant that +each time we needed to modify a node for a device the kernel needed to be recompiled. This no longer holds true as device tree and kernel image are separate binaries now. + +Device trees are stored inside device tree sources (*.dts*) and compiled into device tree blobs (*.dtb*). + +.. code-block:: bash + + # compile dtbs + $ make dtbs + + # location for DT sources on arm32 + $ ls arch/arm/boot/dts/ + imx6ul-14x14-evk.dtb imx6ull-14x14-evk.dtb bcm2835-rpi-a-plus.dts + + # location for DT source on arm64 + $ ls arch/arm64/boot/dts/ + imx8mm-evk.dts imx8mp-evk.dts + +The following image is a represantation of a simple device tree, describing board type, cpu and memory. + +.. image:: ../res/dts_node.png + :align: center + +Notice that a device tree node can be defined using ``label: name@address``: + + - ``label``, is an identifier used to reference the node from other places + - ``name``, node identifier + - ``address``, used to differentiate nodes with the same name. + +A node might contain several properties arranged in the ``name = value`` format. The name is a string +and the value can be bytes, strings, array of strings. + +Here is an example: + +.. code:: c + + / { + node@0 { + empty-property; + string-property = "string value"; + string-list-property = "string value 1", "string value 2"; + int-list-property = ; + + child-node@0 { + child-empty-property; + child-string-property = "string value"; + child-node-reference = <&child-node1>; + }; + + child-node1: child-node@1 { + child-empty-property; + child-string-property = "string value"; + }; + }; + }; + +Qemu +==== + +We will use ``qemu-system-arm`` to boot 32bit ARM platforms. Although, this can be installed from official distro repos, for example: + +.. code:: bash + + sudo apt-get install -y qemu-system-arm + +We strongly recommend using latest version of ``qemu-system-arm`` build from sources: + +.. code:: bash + + $ git clone https://gitlab.com/qemu-project/qemu.git + $ ./configure --target-list=arm-softmmu --disable-docs + $ make -j8 + $ ./build/qemu-system-arm + +Exercises +========= + +.. include:: ../labs/exercises-summary.hrst +.. |LAB_NAME| replace:: arm_kernel_development + +.. warning:: + + The rules for working with the virtual machine for ``ARM`` are modified as follows + + .. code-block:: shell + + # modules build + tools/labs $ ARCH=arm CROSS_COMPILE=arm-linux-gnueabihf- make build + # modules copy + tools/labs $ ARCH=arm make copy + # kernel build + $ ARCH=arm CROSS_COMPILE=arm-linux-gnueabihf- make -j8 + +0. Intro +-------- + +Inspect the following locations in the Linux kernel code and identify platforms and vendors using +ARM architecture: + + - 32-bit: ``arch/arm/boot/dts`` + - 64-bit: ``arch/arm64/boot/dts`` + + +Use ``qemu`` and look at the supported platforms: + +.. code-block:: bash + + ../qemu/build/arm-softmmu/qemu-system-arm -M ? + +.. note:: We used our own compiled version of ``Qemu`` for ``arm32``. See `Qemu`_ section for more details. + +1. Boot +------- + +Use ``qemu`` to boot ``i.MX6UL`` platform. In order to boot, we first need to compile the kernel. +Review `Compiling the Linux kernel on ARM`_ section. + +Successful compilation will result in the following binaries: + + - ``arch/arm/boot/Image``, kernel image compiled for ARM + - ``arch/arm/boot/dts/imx6ul-14x14-evk.dtb``, device tree blob for ``i.MX6UL`` board + +Review `Rootfs`_ section and download ``core-image-minimal-qemuarm.ext4`` rootfs. +Run ``qemu`` using then following command: + +.. code-block:: bash + + ../qemu/build/arm-softmmu/qemu-system-arm -M mcimx6ul-evk -cpu cortex-a7 -m 512M \ + -kernel arch/arm/boot/zImage -nographic -dtb arch/arm/boot/dts/imx6ul-14x14-evk.dtb \ + -append "root=/dev/mmcblk0 rw console=ttymxc0 loglevel=8 earlycon printk" -sd tools/labs/core-image-minimal-qemuarm.ext4 + +.. note:: LCDIF and ASRC devices are not well supported with ``Qemu``. Remove them from compilation. + +.. code-block:: bash + + $ ARCH=arm CROSS_COMPILE=arm-linux-gnueabihf- make menuconfig + # set FSL_ASRC=n and DRM_MXSFB=n + $ ARCH=arm CROSS_COMPILE=arm-linux-gnueabihf- make -j8 + +Once the kernel is booted check kernel version and cpu info: + +.. code-block:: bash + + $ cat /proc/cpuinfo + $ cat /proc/version + +2. CPU information +------------------ + +Inspect the CPU configuration for ``NXP i.MX6UL`` board. Start with ``arch/arm/boot/dts/imx6ul-14x14-evk.dts``. + + - find ``cpu@0`` device tree node and look for ``operating-points`` property. + - read the maximum and minimum operating frequency the processor can run + + .. code:: bash + + $ cat /sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_min_freq + $ cat /sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq + +3. I/O memory +------------- +Inspect I/O space configuration for ``NXP i.MX6UL`` board. Start with ``arch/arm/boot/dts/imx6ul-14x14-evk.dts`` and identify each device mentioned below. + +.. code:: bash + + $ cat /proc/iomem + 00900000-0091ffff : 900000.sram sram@900000 + 0209c000-0209ffff : 209c000.gpio gpio@209c000 + 021a0000-021a3fff : 21a0000.i2c i2c@21a0000 + 80000000-9fffffff : System RAM + +Identify device tree nodes corresponding to: + + - ``System RAM``, look for ``memory@80000000`` node in ``arch/arm/boot/dts/imx6ul-14x14-evk.dtsi``. What's the size of the System RAM? + - ``GPIO1``, look for ``gpio@209c000`` node in ``arch/arm/boot/dts/imx6ul.dtsi``. What's the size of the I/O space for this device? + - ``I2C1``, look for ``i2c@21a0000`` node in ``arch/arm/boot/dts/imx6ul.dtsi``. What's the size of the I/O spaces for this device? + +4. Hello World +-------------- + +Implement a simple kernel module that prints a message at load/unload time. Compile it and load it on ``i.MX6UL`` emulated platform. + +.. code-block:: shell + + # modules build + tools/labs $ ARCH=arm CROSS_COMPILE=arm-linux-gnueabihf- make build + # modules copy + tools/labs $ ARCH=arm make copy + # kernel build + $ ARCH=arm CROSS_COMPILE=arm-linux-gnueabihf- make -j8 + +5. Simple device +---------------- + +Implement a driver for a simple platform device. Find ``TODO 1`` and notice how ``simple_driver`` is declared and register as a platform driver. +Follow ``TODO 2`` and add the ``so2,simple-device-v1`` and ``so2,simple-device-v2`` compatible strings in the simple_device_ids array. + +Create two device tree nodes in ``arch/arm/boot/dts/imx6ul.dtsi`` under ``soc`` node with compatible strings ``so2,simple-device-v1`` and +``so2,simple-device-v2`` respectively. Then notice the behavior when loading ``simple_driver`` module. + +.. _imx6ul: https://www.nxp.com/products/processors-and-microcontrollers/arm-processors/i-mx-applications-processors/i-mx-6-processors/i-mx-6ultralite-processor-low-power-secure-arm-cortex-a7-core:i.MX6UL diff --git a/Documentation/teaching/labs/block_device_drivers.rst b/Documentation/teaching/labs/block_device_drivers.rst new file mode 100644 index 00000000000000..3b6efca7e3099c --- /dev/null +++ b/Documentation/teaching/labs/block_device_drivers.rst @@ -0,0 +1,1210 @@ +==================== +Block Device Drivers +==================== + +Lab objectives +============== + + * acquiring knowledge about the behavior of the I/O subsystem on Linux + * hands-on activities in structures and functions of block devices + * acquiring basic skills for utilizing the API for block devices, by solving + exercises + +Overview +======== + +Block devices are characterized by random access to data organized in fixed-size +blocks. Examples of such devices are hard drives, CD-ROM drives, RAM disks, etc. +The speed of block devices is generally much higher than the speed of character +devices, and their performance is also important. This is why the Linux kernel +handles differently these 2 types of devices (it uses a specialized API). + +Working with block devices is therefore more complicated than working with +character devices. Character devices have a single current position, while block +devices must be able to move to any position in the device to provide random +access to data. To simplify work with block devices, the Linux kernel provides +an entire subsystem called the block I/O (or block layer) subsystem. + +From the kernel perspective, the smallest logical unit of addressing is the +block. Although the physical device can be addressed at sector level, the kernel +performs all disk operations using blocks. Since the smallest unit of physical +addressing is the sector, the size of the block must be a multiple of the size +of the sector. Additionally, the block size must be a power of 2 and can not +exceed the size of a page. The size of the block may vary depending on the file +system used, the most common values being 512 bytes, 1 kilobytes and 4 +kilobytes. + + +Register a block I/O device +=========================== + +To register a block I/O device, function :c:func:`register_blkdev` is used. +To deregister a block I/O device, function :c:func:`unregister_blkdev` is +used. + +Starting with version 4.9 of the Linux kernel, the call to +:c:func:`register_blkdev` is optional. The only operations performed by this +function are the dynamic allocation of a major (if the major argument is 0 when +calling the function) and creating an entry in :file:`/proc/devices`. In +future kernel versions it may be removed; however, most drivers still call it. + +Usually, the call to the register function is performed in the module +initialization function, and the call to the deregister function is performed in +the module exit function. A typical scenario is presented below: + + +.. code-block:: c + + #include + + #define MY_BLOCK_MAJOR 240 + #define MY_BLKDEV_NAME "mybdev" + + static int my_block_init(void) + { + int status; + + status = register_blkdev(MY_BLOCK_MAJOR, MY_BLKDEV_NAME); + if (status < 0) { + printk(KERN_ERR "unable to register mybdev block device\n"); + return -EBUSY; + } + //... + } + + static void my_block_exit(void) + { + //... + unregister_blkdev(MY_BLOCK_MAJOR, MY_BLKDEV_NAME); + } + + +Register a disk +=============== + +Although the :c:func:`register_blkdev` function obtains a major, it does not +provide a device (disk) to the system. For creating and using block devices +(disks), a specialized interface defined in :file:`linux/genhd.h` is used. + +The useful functions defined in :file:`linux/genhd.h` are to register /allocate +a disk, add it to the system, and de-register /unmount the disk. + +The :c:func:`alloc_disk` function is used to allocate a disk, and the +:c:func:`del_gendisk` function is used to deallocate it. Adding the disk to the +system is done using the :c:func:`add_disk` function. + +The :c:func:`alloc_disk` and :c:func:`add_disk` functions are typically used in +the module initialization function, and the :c:func:`del_gendisk` function in +the module exit function. + +.. code-block:: c + + #include + #include + + #define MY_BLOCK_MINORS 1 + + static struct my_block_dev { + struct gendisk *gd; + //... + } dev; + + static int create_block_device(struct my_block_dev *dev) + { + dev->gd = alloc_disk(MY_BLOCK_MINORS); + //... + add_disk(dev->gd); + } + + static int my_block_init(void) + { + //... + create_block_device(&dev); + } + + static void delete_block_device(struct my_block_dev *dev) + { + if (dev->gd) + del_gendisk(dev->gd); + //... + } + + static void my_block_exit(void) + { + delete_block_device(&dev); + //... + } + +As with character devices, it is recommended to use :c:type:`my_block_dev` +structure to store important elements describing the block device. + +Note that immediately after calling the :c:func:`add_disk` function (actually +even during the call), the disk is active and its methods can be called at any +time. As a result, this function should not be called before the driver is fully +initialized and ready to respond to requests for the registered disk. + + +It can be noticed that the basic structure in working with block devices (disks) +is the :c:type:`struct gendisk` structure. + +After a call to :c:func:`del_gendisk`, the :c:type:`struct gendisk` structure +may continue to exist (and the device operations may still be called) if there +are still users (an open operation was called on the device but the associated +release operation has not been called). One solution is to keep the number of +users of the device and call the :c:func:`del_gendisk` function only when there +are no users left of the device. + +:c:type:`struct gendisk` structure +================================== + +The :c:type:`struct gendisk` structure stores information about a disk. As +stated above, such a structure is obtained from the :c:func:`alloc_disk` call +and its fields must be filled before it is sent to the :c:func:`add_disk` +function. + +The :c:type:`struct gendisk` structure has the following important fields: + + * :c:member:`major`, :c:member:`first_minor`, :c:member:`minor`, describing + the identifiers used by the disk; a disk must have at least one minor; if + the disk allows the partitioning operation, a minor must be allocated for + each possible partition + * :c:member:`disk_name`, which represents the disk name as it appears in + :file:`/proc/partitions` and in sysfs (:file:`/sys/block`) + * :c:member:`fops`, representing operations associated with the disk + * :c:member:`queue`, which represents the queue of requests + * :c:member:`capacity`, which is disk capacity in 512 byte sectors; + it is initialized using the :c:func:`set_capacity` function + * :c:member:`private_data`, which is a pointer to private data + +An example of filling a :c:type:`struct gendisk` structure is presented below: + +.. code-block:: c + + #include + #include + #include + + #define NR_SECTORS 1024 + + #define KERNEL_SECTOR_SIZE 512 + + static struct my_block_dev { + //... + spinlock_t lock; /* For mutual exclusion */ + struct request_queue *queue; /* The device request queue */ + struct gendisk *gd; /* The gendisk structure */ + //... + } dev; + + static int create_block_device(struct my_block_dev *dev) + { + ... + /* Initialize the gendisk structure */ + dev->gd = alloc_disk(MY_BLOCK_MINORS); + if (!dev->gd) { + printk (KERN_NOTICE "alloc_disk failure\n"); + return -ENOMEM; + } + + dev->gd->major = MY_BLOCK_MAJOR; + dev->gd->first_minor = 0; + dev->gd->fops = &my_block_ops; + dev->gd->queue = dev->queue; + dev->gd->private_data = dev; + snprintf (dev->gd->disk_name, 32, "myblock"); + set_capacity(dev->gd, NR_SECTORS); + + add_disk(dev->gd); + + return 0; + } + + static int my_block_init(void) + { + int status; + //... + status = create_block_device(&dev); + if (status < 0) + return status; + //... + } + + static void delete_block_device(struct my_block_dev *dev) + { + if (dev->gd) { + del_gendisk(dev->gd); + } + //... + } + + static void my_block_exit(void) + { + delete_block_device(&dev); + //... + } + +As stated before, the kernel considers a disk as a vector of 512 byte sectors. +In reality, the devices may have a different size of the sector. To work with +these devices, the kernel needs to be informed about the real size of a sector, +and for all operations the necessary conversions must be made. + +To inform the kernel about the device sector size, a parameter of the request +queue must be set just after the request queue is allocated, using the +:c:func:`blk_queue_logical_block_size` function. All requests generated by the +kernel will be multiple of this sector size and will be aligned accordingly. +However, communication between the device and the driver will still be performed +in sectors of 512 bytes in size, so conversion should be done each time (an +example of such conversion is when calling the :c:func:`set_capacity` function +in the code above). + +:c:type:`struct block_device_operations` structure +================================================== + +Just as for a character device, operations in :c:type:`struct file_operations` +should be completed, so for a block device, the operations in +:c:type:`struct block_device_operations` should be completed. The association +of operations is done through the :c:member:`fops` field in the +:c:type:`struct gendisk` +structure. + +Some of the fields of the :c:type:`struct block_device_operations` structure +are presented below: + +.. code-block:: c + + struct block_device_operations { + int (*open) (struct block_device *, fmode_t); + int (*release) (struct gendisk *, fmode_t); + int (*locked_ioctl) (struct block_device *, fmode_t, unsigned, + unsigned long); + int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); + int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, + unsigned long); + int (*direct_access) (struct block_device *, sector_t, + void **, unsigned long *); + int (*media_changed) (struct gendisk *); + int (*revalidate_disk) (struct gendisk *); + int (*getgeo)(struct block_device *, struct hd_geometry *); + blk_qc_t (*submit_bio) (struct bio *bio); + struct module *owner; + } + +:c:func:`open` and :c:func:`release` operations are called directly from user +space by utilities that may perform the following tasks: partitioning, file +system creation, file system verification. In a :c:func:`mount` operation, the +:c:func:`open` function is called directly from the kernel space, the file +descriptor being stored by the kernel. A driver for a block device can not +differentiate between :c:func:`open` calls performed from user space and kernel +space. + +An example of how to use these two functions is given below: + +.. code-block:: c + + #include + #include + + static struct my_block_dev { + //... + struct gendisk * gd; + //... + } dev; + + static int my_block_open(struct block_device *bdev, fmode_t mode) + { + //... + + return 0; + } + + static int my_block_release(struct gendisk *gd, fmode_t mode) + { + //... + + return 0; + } + + struct block_device_operations my_block_ops = { + .owner = THIS_MODULE, + .open = my_block_open, + .release = my_block_release + }; + + static int create_block_device(struct my_block_dev *dev) + { + //.... + dev->gd->fops = &my_block_ops; + dev->gd->private_data = dev; + //... + } + +Please notice that there are no read or write operations. These operations are +performed by the :c:func:`request` function associated with the request queue +of the disk. + +Request Queues - Multi-Queue Block Layer +======================================== + +Drivers for block devices use queues to store the block I/O requests that will +be processed. A request queue is represented by the +:c:type:`struct request_queue` structure. The request queue is made up of a +double-linked list of requests and their associated control information. The +requests are added to the queue by higher-level kernel code (for example, file +systems). + +The block device driver associates each queue with a handling function, which +will be called for each request in the queue +(the :c:type:`struct request` structure). + +In earlier version of the Linux kernel, each device driver had associated one or +more request queues (:c:type:`struct request_queue`), where any client could add +requests, while also being able to reorder them. +The problem with this approach is that it requires a per-queue lock, making it +inefficient in distributed systems. + +The `Multi-Queue Block Queing Mechanism `_ +solves this issue by splitting the device driver queue in two parts: + 1. Software staging queues + 2. Hardware dispatch queues + +Software staging queues +----------------------- + +The staging queues hold requests from the clients before sending them to the +block device driver. To prevent the waiting for a per-queue lock, a staging +queue is allocated for each CPU or node. A software queue is associated to +only one hardware queue. + +While in this queue, the requests can be merged or reordered, according to an +I/O Scheduler, in order to maximize performance. This means that only the +requests coming from the same CPU or node can be optimized. + +Staging queues are usually not used by the block device drivers, but only +internally by the I/O subsystem to optimize requests before sending them to the +device drivers. + +Hardware dispatch queues +------------------------ + +The hardware queues (:c:type:`struct blk_mq_hw_ctx`) are used to send the +requests from the staging queues to the block device driver. +Once in this queue, the requests can't be merged or reordered. + +Depending on the underlying hardware, a block device driver can create multiple +hardware queues in order to improve parallelism and maximize performance. + +Tag sets +-------- + +A block device driver can accept a request before the previous one is completed. +As a consequence, the upper layers need a way to know when a request is +completed. For this, a "tag" is added to each request upon submission and sent +back using a completion notification after the request is completed. + +The tags are part of a tag set (:c:type:`struct blk_mq_tag_set`), which is +unique to a device. +The tag set structure is allocated and initialized before the request queues +and also stores some of the queues properties. + +.. code-block:: c + + struct blk_mq_tag_set { + ... + const struct blk_mq_ops *ops; + unsigned int nr_hw_queues; + unsigned int queue_depth; + unsigned int cmd_size; + int numa_node; + void *driver_data; + struct blk_mq_tags **tags; + struct list_head tag_list; + ... + }; + +Some of the fields in :c:type:`struct blk_mq_tag_set` are: + + * ``ops`` - Queue operations, most notably the request handling function. + * ``nr_hw_queues`` - The number of hardware queues allocated for the device + * ``queue_depth`` - Hardware queues size + * ``cmd_size`` - Number of extra bytes allocated at the end of the device, to + be used by the block device driver, if needed. + * ``numa_node`` - In NUMA systems, the index of the node the storage device is + connected to. + * ``driver_data`` - Data private to the driver, if needed. + * ``tags`` - Pointer to an array of ``nr_hw_queues`` tag sets. + * ``tag_list`` - List of request queues using this tag set. + +Create and delete a request queue +--------------------------------- + +Request queues are created using the :c:func:`blk_mq_init_queue` function and +are deleted using :c:func:`blk_cleanup_queue`. The first function creates both +the hardware and the software queues and initializes their structures. + +Queue properties, including the number of hardware queues, their capacity and +request handling function are configured using the :c:type:`blk_mq_tag_set` +structure, as described above. + +An example of using these functions is as follows: + +.. code-block:: c + + #include + #include + #include + + static struct my_block_dev { + //... + struct blk_mq_tag_set tag_set; + struct request_queue *queue; + //... + } dev; + + static blk_status_t my_block_request(struct blk_mq_hw_ctx *hctx, + const struct blk_mq_queue_data *bd) + //... + + static struct blk_mq_ops my_queue_ops = { + .queue_rq = my_block_request, + }; + + static int create_block_device(struct my_block_dev *dev) + { + /* Initialize tag set. */ + dev->tag_set.ops = &my_queue_ops; + dev->tag_set.nr_hw_queues = 1; + dev->tag_set.queue_depth = 128; + dev->tag_set.numa_node = NUMA_NO_NODE; + dev->tag_set.cmd_size = 0; + dev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; + err = blk_mq_alloc_tag_set(&dev->tag_set); + if (err) { + goto out_err; + } + + /* Allocate queue. */ + dev->queue = blk_mq_init_queue(&dev->tag_set); + if (IS_ERR(dev->queue)) { + goto out_blk_init; + } + + blk_queue_logical_block_size(dev->queue, KERNEL_SECTOR_SIZE); + + /* Assign private data to queue structure. */ + dev->queue->queuedata = dev; + //... + + out_blk_init: + blk_mq_free_tag_set(&dev->tag_set); + out_err: + return -ENOMEM; + } + + static int my_block_init(void) + { + int status; + //... + status = create_block_device(&dev); + if (status < 0) + return status; + //... + } + + static void delete_block_device(struct block_dev *dev) + { + //... + blk_cleanup_queue(dev->queue); + blk_mq_free_tag_set(&dev->tag_set); + } + + static void my_block_exit(void) + { + delete_block_device(&dev); + //... + } + +After initializing the tag set structure, the tag lists are allocated using the +:c:func:`blk_mq_alloc_tag_set` function. +The pointer to the function which will process the requests +(:c:func:`my_block_request`) is filled in the ``my_queue_ops`` structure and +then the pointer to this structure is added to the tag set. + +The queue is created using the :c:func:`blk_mq_init_queue` function, based on +the information added in the tag set. + +As part of the request queue initialization, you can configure the +:c:member:`queuedata` field, which is equivalent to the :c:member:`private_data` +field in other structures. + +Useful functions for processing request queues +---------------------------------------------- + +The ``queue_rq`` function from :c:type:`struct blk_mq_ops` is used to handle +requests for working with the block device. +This function is the equivalent of read and write functions encountered on +character devices. The function receives the requests for the device as +arguments and can use various functions for processing them. + +The functions used to process the requests in the handler are described below: + + * :c:func:`blk_mq_start_request` - must be called before starting processing + a request; + * :c:func:`blk_mq_requeue_request` - to re-send the request in the queue; + * :c:func:`blk_mq_end_request` - to end request processing and notify the + upper layers. + +Requests for block devices +========================== + +A request for a block device is described by :c:type:`struct request` +structure. + +The fields of :c:type:`struct request` structure include: + + * :c:member:`cmd_flags`: a series of flags including direction (reading or + writing); to find out the direction, the macrodefinition + :c:macro:`rq_data_dir` is used, which returns 0 for a read request and 1 + for a write request on the device; + * :c:member:`__sector`: the first sector of the transfer request; if the + device sector has a different size, the appropriate conversion should be + done. To access this field, use the :c:macro:`blk_rq_pos` macro; + * :c:member:`__data_len`: the total number of bytes to be transferred; to + access this field the :c:macro:`blk_rq_bytes` macro is used; + * generally, data from the current :c:type:`struct bio` will be + transferred; the data size is obtained using the + :c:macro:`blk_rq_cur_bytes` macro; + * :c:member:`bio`, a dynamic list of :c:type:`struct bio` structures that + is a set of buffers associated to the request; this field is accessed by + macrodefinition :c:macro:`rq_for_each_segment` if there are multiple + buffers, or by :c:macro:`bio_data` macrodefinition in case there is only + one associated buffer; + +We will discuss more about the :c:type:`struct bio` structure and its +associated operations in the :ref:`bio_structure` section. + +Create a request +---------------- + +Read /write requests are created by code layers superior to the kernel I/O +subsystem. Typically, the subsystem that creates requests for block devices is +the file management subsystem. The I/O subsystem acts as an interface between +the file management subsystem and the block device driver. The main operations +under the responsibility of the I/O subsystem are adding requests to the queue +of the specific block device and sorting and merging requests according to +performance considerations. + +Process a request +----------------- + +The central part of a block device driver is the request handling function +(``queue_rq``). In previous examples, the function that fulfilled this role was +:c:func:`my_block_request`. As stated in the +`Create and delete a request queue`_ section, this function is associated to the +driver when creating the tag set structure. + +This function is called when the kernel considers that the driver should process +I/O requests. The function must start processing the requests from the queue, +but it is not mandatory to finish them, as requests may be finished by other +parts of the driver. + +The request function runs in an atomic context and must follow the rules for +atomic code (it does not need to call functions that can cause sleep, etc.). + +Calling the function that processes the requests is asynchronous relative +to the actions of any userspace process and no assumptions about the process +in which the respective function is running should be made. Also, it should not +be assumed that the buffer provided by a request is from kernel space or user +space, any operation that accesses the userspace being erroneous. + +One of the simplest request handling function is presented below: + +.. code-block:: c + + static blk_status_t my_block_request(struct blk_mq_hw_ctx *hctx, + const struct blk_mq_queue_data *bd) + { + struct request *rq = bd->rq; + struct my_block_dev *dev = q->queuedata; + + blk_mq_start_request(rq); + + if (blk_rq_is_passthrough(rq)) { + printk (KERN_NOTICE "Skip non-fs request\n"); + blk_mq_end_request(rq, BLK_STS_IOERR); + goto out; + } + + /* do work */ + ... + + blk_mq_end_request(rq, BLK_STS_OK); + + out: + return BLK_STS_OK; + } + +The :c:func:`my_block_request` function performs the following operations: + + * Get a pointer to the request structure from the ``bd`` argument and start + its processing using the :c:func:`blk_mq_start_request` function. + * A block device can receive calls which do not transfer data blocks (e.g. + low level operations on the disk, instructions referring to special ways of + accessing the device). Most drivers do not know how to handle these + requests and return an error. + * To return an error, :c:func:`blk_mq_end_request` function is called, + ``BLK_STS_IOERR`` being the second argument. + * The request is processed according to the needs of the associated device. + * The request ends. In this case, :c:func:`blk_mq_end_request` function is + called in order to complete the request. + +.. bio_structure: + +:c:type:`struct bio` structure +============================== + +Each :c:type:`struct request` structure is an I/O block request, but may come +from combining more independent requests from a higher level. The sectors to be +transferred for a request can be scattered into the main memory but they always +correspond to a set of consecutive sectors on the device. The request is +represented as a series of segments, each corresponding to a buffer in memory. +The kernel can combine requests that refer to adjacent sectors but will not +combine write requests with read requests into a single +:c:type:`struct request` structure. + +A :c:type:`struct request` structure is implemented as a linked list of +:c:type:`struct bio` structures together with information that allows the +driver to retain its current position while processing the request. + +The :c:type:`struct bio` structure is a low-level description of a portion of +a block I/O request. + +.. code-block:: c + + struct bio { + //... + struct gendisk *bi_disk; + unsigned int bi_opf; /* bottom bits req flags, top bits REQ_OP. Use accessors. */ + //... + struct bio_vec *bi_io_vec; /* the actual vec list */ + //... + struct bvec_iter bi_iter; + /... + void *bi_private; + //... + }; + +In turn, the :c:type:`struct bio` structure contains a :c:member:`bi_io_vec` +vector of :c:type:`struct bio_vec` structures. It consists of the individual +pages in the physical memory to be transferred, the offset within the page and +the size of the buffer. To iterate through a :c:type:`struct bio` structure, +we need to iterate through the vector of :c:type:`struct bio_vec` and transfer +the data from every physical page. To simplify vector iteration, the +:c:type:`struct bvec_iter` structure is used. This structure maintains +information about how many buffers and sectors were consumed during the +iteration. The request type is encoded in the :c:member:`bi_opf` field; to +determine it, use the :c:func:`bio_data_dir` function. + +Create a :c:type:`struct bio` structure +--------------------------------------- + +Two functions can be used to create a :c:type:`struct bio` structure: + + * :c:func:`bio_alloc`: allocates space for a new structure; the structure + must be initialized; + * :c:func:`bio_clone`: makes a copy of an existing :c:type:`struct bio` + structure; the newly obtained structure is initialized with the values of + the cloned structure fields; the buffers are shared with the + :c:type:`struct bio` structure that has been cloned so that access to the + buffers has to be done carefully to avoid access to the same memory area + from the two clones; + +Both functions return a new :c:type:`struct bio` structure. + +Submit a :c:type:`struct bio` structure +--------------------------------------- + +Usually, a :c:type:`struct bio` structure is created by the higher levels of +the kernel (usually the file system). A structure thus created is then +transmitted to the I/O subsystem that gathers more :c:type:`struct bio` +structures into a request. + +For submitting a :c:type:`struct bio` structure to the associated I/O device +driver, the :c:func:`submit_bio` function is used. The function receives as +argument an initialized :c:type:`struct bio` structure that will be added to +a request from the request queue of an I/O device. From that queue, it can be +processed by the I/O device driver using a specialized function. + + +.. _bio_completion: + +Wait for the completion of a :c:type:`struct bio` structure +----------------------------------------------------------- + +Submitting a :c:type:`struct bio` structure to a driver has the effect of +adding it to a request from the request queue from where it will be further +processed. Thus, when the :c:func:`submit_bio` function returns, it is not +guaranteed that the processing of the structure has finished. If you want to +wait for the processing of the request to be finished, use the +:c:func:`submit_bio_wait` function. + +To be notified when the processing of a :c:type:`struct bio` structure ends +(when we do not use :c:func:`submit_bio_wait` function), the +:c:member:`bi_end_io` field of the structure should be used. This field +specifies the function that will be called at the end of the +:c:type:`struct bio` structure processing. You can use the +:c:member:`bi_private` field of the structure to pass information to the +function. + +Initialize a :c:type:`struct bio` structure +------------------------------------------- + +Once a :c:type:`struct bio` structure has been allocated and before being +transmitted, it must be initialized. + +Initializing the structure involves filling in its important fields. As +mentioned above, the :c:member:`bi_end_io` field is used to specify the function +called when the processing of the structure is finished. The +:c:member:`bi_private` field is used to store useful data that can be accessed +in the function pointed by :c:member:`bi_end_io`. + +The :c:member:`bi_opf` field specifies the type of operation. + +.. code-block:: c + + struct bio *bio = bio_alloc(GFP_NOIO, 1); + //... + bio->bi_disk = bdev->bd_disk; + bio->bi_iter.bi_sector = sector; + bio->bi_opf = REQ_OP_READ; + bio_add_page(bio, page, size, offset); + //... + +In the code snippet above we specified the block device to which we sent the +following: :c:type:`struct bio` structure, startup sector, operation +(:c:data:`REQ_OP_READ` or :c:data:`REQ_OP_WRITE`) and content. The content of a +:c:type:`struct bio` structure is a buffer described by: a physical page, +the offset in the page and the size of the bufer. A page can be assigned using +the :c:func:`alloc_page` call. + +.. note:: The :c:data:`size` field of the :c:func:`bio_add_page` call must be + a multiple of the device sector size. + +.. _bio_content: + +How to use the content of a :c:type:`struct bio` structure +---------------------------------------------------------- + +To use the content of a :c:type:`struct bio` structure, the structure's +support pages must be mapped to the kernel address space from where they can be +accessed. For mapping /unmapping, use the :c:macro:`kmap_atomic` and +the :c:macro:`kunmap_atomic` macros. + +A typical example of use is: + +.. code-block:: c + + static void my_block_transfer(struct my_block_dev *dev, size_t start, + size_t len, char *buffer, int dir); + + + static int my_xfer_bio(struct my_block_dev *dev, struct bio *bio) + { + struct bio_vec bvec; + struct bvec_iter i; + int dir = bio_data_dir(bio); + + /* Do each segment independently. */ + bio_for_each_segment(bvec, bio, i) { + sector_t sector = i.bi_sector; + char *buffer = kmap_atomic(bvec.bv_page); + unsigned long offset = bvec.bv_offset; + size_t len = bvec.bv_len; + + /* process mapped buffer */ + my_block_transfer(dev, sector, len, buffer + offset, dir); + + kunmap_atomic(buffer); + } + + return 0; + } + +As it can be seen from the example above, iterating through a +:c:type:`struct bio` requires iterating through all of its segments. A segment +(:c:type:`struct bio_vec`) is defined by the physical address page, the offset +in the page and its size. + +To simplify the processing of a :c:type:`struct bio`, use the +:c:macro:`bio_for_each_segment` macrodefinition. It will iterate through all +segments, and will also update global information stored in an iterator +(:c:type:`struct bvec_iter`) such as the current sector as well as other +internal information (segment vector index, number of bytes left to be +processed, etc.) . + +You can store information in the mapped buffer, or extract information. + +In case request queues are used and you needed to process the requests +at :c:type:`struct bio` level, use the :c:macro:`rq_for_each_segment` +macrodefinition instead of the :c:macro:`bio_for_each_segment` macrodefinition. +This macrodefinition iterates through each segment of each +:c:type:`struct bio` structure of a :c:type:`struct request` structure and +updates a :c:type:`struct req_iterator` structure. The +:c:type:`struct req_iterator` contains the current :c:type:`struct bio` +structure and the iterator that traverses its segments. + +A typical example of use is: + +.. code-block:: c + + struct bio_vec bvec; + struct req_iterator iter; + + rq_for_each_segment(bvec, req, iter) { + sector_t sector = iter.iter.bi_sector; + char *buffer = kmap_atomic(bvec.bv_page); + unsigned long offset = bvec.bv_offset; + size_t len = bvec.bv_len; + int dir = bio_data_dir(iter.bio); + + my_block_transfer(dev, sector, len, buffer + offset, dir); + + kunmap_atomic(buffer); + } + +Free a :c:type:`struct bio` structure +------------------------------------- + +Once a kernel subsystem uses a :c:type:`struct bio` structure, it will have to +release the reference to it. This is done by calling :c:func:`bio_put` function. + +Set up a request queue at :c:type:`struct bio` level +---------------------------------------------------- + +We have previously seen how we can specify a function to be used to process +requests sent to the driver. The function receives as argument the requests and +carries out processing at :c:type:`struct request` level. + +If, for flexibility reasons, we need to specify a function that carries +out processing at :c:type:`struct bio` structure level, we no longer +use request queues and we will need to fill the ``submit_bio`` field in the +:c:type:`struct block_device_operations` associated to the driver. + +Below is a typical example of initializing a function that carries out +processing at :c:type:`struct bio` structure level: + +.. code-block:: c + + // the declaration of the function that carries out processing + // :c:type:`struct bio` structures + static blk_qc_t my_submit_bio(struct bio *bio); + + struct block_device_operations my_block_ops = { + .owner = THIS_MODULE, + .submit_bio = my_submit_bio + ... + }; + +Further reading +=============== + +* `Linux Device Drivers 3rd Edition, Chapter 16. Block Drivers `_ +* Linux Kernel Development, Second Edition – Chapter 13. The Block I/O Layer +* `A simple block driver `_ +* `The gendisk interface `_ +* `The bio structure `_ +* `Request queues `_ +* `Documentation/block/request.txt - Struct request documentation `_ +* `Documentation/block/biodoc.txt - Notes on the Generic Block Layer `_ +* `drivers/block/brd/c - RAM backed block disk driver `_ +* `I/O Schedulers `_ + + +Exercises +========= + +.. include:: ../labs/exercises-summary.hrst +.. |LAB_NAME| replace:: block_device_drivers + +0. Intro +-------- + +Using |LXR|_ find the definitions of the following symbols in the Linux kernel: + + * :c:type:`struct bio` + * :c:type:`struct bio_vec` + * :c:macro:`bio_for_each_segment` + * :c:type:`struct gendisk` + * :c:type:`struct block_device_operations` + * :c:type:`struct request` + +1. Block device +--------------- + +Create a kernel module that allows you to register or deregister a block device. +Start from the files in the :file:`1-2-3-6-ram-disk/kernel` directory in the +lab skeleton. + +Follow the comments marked with **TODO 1** in the laboratory skeleton. Use the +existing macrodefinitions (:c:macro:`MY_BLOCK_MAJOR`, +:c:macro:`MY_BLKDEV_NAME`). Check the value returned by the register function, +and in case of error, return the error code. + +Compile the module, copy it to the virtual machine and insert it into the +kernel. Verify that your device was successfully created inside the +:file:`/proc/devices`. +You will see a device with major 240. + +Unload the kernel module and check that the device was unregistered. + +.. hint:: Review the `Register a block I/O device`_ section. + +Change the :c:macro:`MY_BLOCK_MAJOR` value to 7. Compile the module, copy it to +the virtual machine, and insert it into the kernel. Notice that the insertion +fails because there is already another driver/device registered in the kernel +with the major 7. + +Restore the 240 value for the :c:macro:`MY_BLOCK_MAJOR` macro. + +2. Disk registration +-------------------- + +Modify the previous module to add a disk associated with the driver. Analyze the +macrodefinitions, :c:type:`my_block_dev` structure and existing functions from +the :file:`ram-disk.c` file. + +Follow the comments marked with **TODO 2**. Use the +:c:func:`create_block_device` and the :c:func:`delete_block_device` functions. + +.. hint:: Review the `Register a disk`_ and `Process a request`_ sections. + +Fill in the :c:func:`my_block_request` function to process the request +without actually processing your request: display the "request received" message +and the following information: start sector, total size, data size from the +current :c:type:`struct bio` structure, direction. To validate a request type, +use the :c:func:`blk_rq_is_passthrough` (the function returns 0 in the case in +which we are interested, i.e. when the request is generated by the file system). + +.. hint:: To find the needed info, review the `Requests for block devices`_ + section. + +Use the :c:func:`blk_mq_end_request` function to finish processing the +request. + +Insert the module into the kernel and inspect the messages printed +by the module. When a device is added, a request is sent to the device. Check +the presence of :file:`/dev/myblock` and if it doesn't exist, create the device +using the command: + +.. code-block:: shell + + mknod /dev/myblock b 240 0 + +To generate writing requests, use the command: + +.. code-block:: shell + + echo "abc"> /dev/myblock + +Notice that a write request is preceded by a read request. The request +is done to read the block from the disk and "update" its content with the +data provided by the user, without overwriting the rest. After reading and +updating, writing takes place. + +3. RAM disk +----------- + +Modify the previous module to create a RAM disk: requests to the device will +result in reads/writes in a memory area. + +The memory area :c:data:`dev->data` is already allocated in the source code of +the module using :c:func:`vmalloc` and deallocated using :c:func:`vfree`. + +.. note:: Review the `Process a request`_ section. + +Follow the comments marked with **TODO 3** to complete the +:c:func:`my_block_transfer` function to write/read the request information +in/from the memory area. The function will be called for each request within +the queue processing function: :c:func:`my_block_request`. To write/read +to/from the memory area, use :c:func:`memcpy`. To determine the write/read +information, use the fields of the :c:type:`struct request` structure. + +.. hint:: To find out the size of the request data, use the + :c:macro:`blk_rq_cur_bytes` macro. Do not use the + :c:macro:`blk_rq_bytes` macro. + +.. hint:: To find out the buffer associated to the request, use + :c:data:`bio_data`(:c:data:`rq->bio`). + +.. hint:: A description of useful macros is in the `Requests for block devices`_ + section. + +.. hint:: You can find useful information in the + `block device driver example + `_ + from `Linux Device Driver `_. + +For testing, use the test file :file:`user/ram-disk-test.c`. +The test program is compiled automatically at ``make build``, copied to the +virtual machine at ``make copy`` and can be run on the QEMU virtual machine +using the command: + +.. code-block:: shell + + ./ram-disk-test + +There is no need to insert the module into the kernel, it will be inserted by +the ``ram-disk-test`` command. + +Some tests may fail because of lack of synchronization between the transmitted +data (flush). + +4. Read data from the disk +-------------------------- + +The purpose of this exercise is to read data from the +:c:macro:`PHYSICAL_DISK_NAME` disk (:file:`/dev/vdb`) directly from the kernel. + +.. attention:: Before solving the exercise, we need to make sure the disk is + added to the virtual machine. + + Check the variable ``QEMU_OPTS`` from :file:`qemu/Makefile`. + There should already be two extra disks added using ``-drive ...``. + + If there are not, generate a file that we will use as + the disk image using the command: + :command:`dd if=/dev/zero of=qemu/mydisk.img bs=1024 count=1` + and add the following option: + :command:`-drive file=qemu/mydisk.img,if=virtio,format=raw` + to :file:`qemu/Makefile` (in the :c:data:`QEMU_OPTS` variable, + after the root disk). + +Follow the comments marked with **TODO 4** in the directory :file:`4-5-relay/` +and implement :c:func:`open_disk` and :c:func:`close_disk`. +Use the :c:func:`blkdev_get_by_path` and :c:func:`blkdev_put` functions. The +device must be opened in read-write mode exclusively +(:c:macro:`FMODE_READ` | :c:macro:`FMODE_WRITE` | :c:macro:`FMODE_EXCL`), and +as holder you must use the current module (:c:macro:`THIS_MODULE`). + +Implement the :c:func:`send_test_bio` function. You will have to create a new +:c:type:`struct bio` structure and fill it, submit it and wait for it. Read the +first sector of the disk. To wait, call the :c:func:`submit_bio_wait` function. + +.. hint:: The first sector of the disk is the sector with the index 0. + This value must be used to initialize the field + :c:member:`bi_iter.bi_sector` of the :c:type:`struct bio`. + + For the read operation, use the :c:macro:`REQ_OP_READ` macro to + initialize the :c:member:`bi_opf` field of the :c:type:`struct bio`. + +After finishing the operation, display the first 3 bytes of data read by +:c:type:`struct bio` structure. Use the format ``"% 02x"`` for :c:func:`printk` +to display the data and the :c:macro:`kmap_atomic` and :c:macro:`kunmap_atomic` +macros respectively. + +.. hint:: As an argument for the :c:func:`kmap_atomic` function, just use the + page which is allocated above in the code, in the :c:data:`page` + variable. + +.. hint:: Review the sections :ref:`bio_content` and :ref:`bio_completion`. + +For testing, use the :file:`test-relay-disk` script, which is copied on the +virtual machine when running :command:`make copy`. If it is not copied, make +sure it is executable: + +.. code-block:: shell + + chmod +x test-relay-disk + +There is no need to load the module into the kernel, it will be loaded by +:command:`test-relay-disk`. + +Use the command below to run the script: + +.. code-block:: shell + + ./test-relay-disk + +The script writes "abc" at the beginning of the disk indicated by +:c:macro:`PHYSICAL_DISK_NAME`. After running, the module will display 61 62 63 +(the corresponding hexadecimal values of letters "a", "b" and "c"). + +5. Write data to the disk +------------------------- + +Follow the comments marked with **TODO 5** to write a message +(:c:macro:`BIO_WRITE_MESSAGE`) on the disk. + +The :c:func:`send_test_bio` function receives as argument the operation type +(read or write). Call in the :c:func:`relay_init` function the function for +reading and in the :c:func:`relay_exit` function the function for writing. We +recommend using the :c:macro:`REQ_OP_READ` and the :c:macro:`REQ_OP_WRITE` +macros. + +Inside the :c:func:`send_test_bio` function, if the operation is write, fill in +the buffer associated to the :c:type:`struct bio` structure with the message +:c:macro:`BIO_WRITE_MESSAGE`. Use the :c:macro:`kmap_atomic` and the +:c:macro:`kunmap_atomic` macros to work with the buffer associated to the +:c:type:`struct bio` structure. + +.. hint:: You need to update the type of the operation associated to the + :c:type:`struct bio` structure by setting the :c:member:`bi_opf` field + accordingly. + +For testing, run the :file:`test-relay-disk` script using the command: + +.. code-block:: shell + + ./test-relay-disk + +The script will display the ``"read from /dev/sdb: 64 65 66"`` message at the +standard output. + +6. Processing requests from the request queue at :c:type:`struct bio` level +--------------------------------------------------------------------------- + +In the implementation from Exercise 3, we have only processed a +:c:type:`struct bio_vec` of the current :c:type:`struct bio` from the request. +We want to process all :c:type:`struct bio_vec` structures from all +:c:type:`struct bio` structures. +For this, we will iterate through all :c:type:`struct bio` requests and through +all :c:type:`struct bio_vec` structures (also called segments) of each +:c:type:`struct bio`. + +Add, within the ramdisk implementation (:file:`1-2-3-6-ram-disk/` directory), +support for processing the requests from the request queue at +:c:type:`struct bio` level. Follow the comments marked with **TODO 6**. + +Set the :c:macro:`USE_BIO_TRANSFER` macro to 1. + +Implement the :c:func:`my_xfer_request` function. Use the +:c:macro:`rq_for_each_segment` macro to iterate through the :c:type:`bio_vec` +structures of each :c:type:`struct bio` from the request. + +.. hint:: Review the indications and the code snippets from the + :ref:`bio_content` section. + +.. hint:: Use the :c:type:`struct bio` segment iterator to get the current + sector (:c:member:`iter.iter.bi_sector`). + +.. hint:: Use the request iterator to get the reference to the current + :c:type:`struct bio` (:c:member:`iter.bio`). + +.. hint:: Use the :c:macro:`bio_data_dir` macro to find the reading or writing + direction for a :c:type:`struct bio`. + +Use the :c:macro:`kmap_atomic` or the :c:macro:`kunmap_atomic` macros to map +the pages of each :c:type:`struct bio` structure and access its associated +buffers. For the actual transfer, call the :c:func:`my_block_transfer` function +implemented in the previous exercise. + +For testing, use the :file:`ram-disk-test.c` test file: + +.. code-block:: shell + + ./ram-disk-test + +There is no need to insert the module into the kernel, it will be inserted by +the :command:`ram-disk-test` executable. + +Some tests may crash because of lack of synchronization between the transmitted +data (flush). diff --git a/Documentation/teaching/labs/deferred_work.rst b/Documentation/teaching/labs/deferred_work.rst new file mode 100644 index 00000000000000..72cf9ac89b74e2 --- /dev/null +++ b/Documentation/teaching/labs/deferred_work.rst @@ -0,0 +1,946 @@ +============= +Deferred work +============= + +Lab objectives +============== + +* Understanding deferred work (i.e. code scheduled to be executed at a + later time) +* Implementation of common tasks that uses deferred work +* Understanding the peculiarities of synchronization for deferred work + +Keywords: softirq, tasklet, struct tasklet_struct, bottom-half +handlers, jiffies, HZ, timer, struct timer_list, spin_lock_bh, +spin_unlock_bh, workqueue, struct work_struct, kernel thread, events/x + +Background information +====================== + +Deferred work is a class of kernel facilities that allows one to +schedule code to be executed at a later timer. This scheduled code can +run either in the process context or in interruption context depending +on the type of deferred work. Deferred work is used to complement the +interrupt handler functionality since interrupts have important +requirements and limitations: + +* The execution time of the interrupt handler must be as small as + possible +* In interrupt context we can not use blocking calls + +Using deferred work we can perform the minimum required work in the +interrupt handler and schedule an asynchronous action from the +interrupt handler to run at a later time and execute the rest of the +operations. + +Deferred work that runs in interrupt context is also known as +bottom-half, since its purpose is to execute the rest of the actions +from an interrupt handler (top-half). + +Timers are another type of deferred work that are used to schedule the +execution of future actions after a certain amount of time has passed. + +Kernel threads are not themselves deferred work, but can be used to +complement the deferred work mechanisms. In general, kernel threads +are used as "workers" to process events whose execution contains +blocking calls. + +There are three typical operations that are used with all types of +deferred work: + +1. **Initialization**. Each type is described by a structure whose + fields will have to be initialized. The handler to be scheduled is + also set at this time. +2. **Scheduling**. Schedules the execution of the handler as soon as + possible (or after expiry of a timeout). +3. **Masking** or **Canceling**. Disables the execution of the + handler. This action can be either synchronous (which guarantees + that the handler will not run after the completion of canceling) or + asynchronous. + +.. attention:: When doing deferred work cleanup, like freeing the + structures associated with the deferred work or + removing the module and thus the handler code from the + kernel, always use the synchronous type of canceling + the deferred work. + +The main types of deferred work are kernel threads and softirqs. Work +queues are implemented on top of kernel threads and tasklets and +timers on top of softirqs. Bottom-half handlers were the first +implementation of deferred work in Linux, but in the meantime it was +replaced by softirqs. That is why some functions presented +contain *bh* in their name. + +Softirqs +======== + +softirqs can not be used by device drivers, they are reserved for +various kernel subsystems. Because of this there is a fixed number of +softirqs defined at compile time. For the current kernel version we +have the following types defined: + +.. code-block:: c + + enum { + HI_SOFTIRQ = 0, + TIMER_SOFTIRQ, + NET_TX_SOFTIRQ, + NET_RX_SOFTIRQ, + BLOCK_SOFTIRQ, + IRQ_POLL_SOFTIRQ, + TASKLET_SOFTIRQ, + SCHED_SOFTIRQ, + HRTIMER_SOFTIRQ, + RCU_SOFTIRQ, + NR_SOFTIRQS + }; + + +Each type has a specific purpose: + +* *HI_SOFTIRQ* and *TASKLET_SOFTIRQ* - running tasklets +* *TIMER_SOFTIRQ* - running timers +* *NET_TX_SOFIRQ* and *NET_RX_SOFTIRQ* - used by the networking subsystem +* *BLOCK_SOFTIRQ* - used by the IO subsystem +* *BLOCK_IOPOLL_SOFTIRQ* - used by the IO subsystem to increase performance when the iopoll handler is invoked; +* *SCHED_SOFTIRQ* - load balancing +* *HRTIMER_SOFTIRQ* - implementation of high precision timers +* *RCU_SOFTIRQ* - implementation of RCU type mechanisms [1]_ + +.. [1] RCU is a mechanism by which destructive operations + (e.g. deleting an element from a chained list) are done in two + steps: (1) removing references to deleted data and (2) freeing + the memory of the element. The second setup is done only after + we are sure nobody uses the element anymore. The advantage of + this mechanism is that reading the data can be done without + synchronization. For more information see + Documentation/RCU/rcu.txt. + + +The highest priority is the *HI_SOFTIRQ* type softirqs, followed in +order by the other softirqs defined. *RCU_SOFTIRQ* has the lowest +priority. + +Softirqs are running in interrupt context which means that they can +not call blocking functions. If the sofitrq handler requires calls to +such functions, work queues can be scheduled to execute these blocking +calls. + +Tasklets +-------- + +A tasklet is a special form of deferred work that runs in interrupt +context, just like softirqs. The main difference between sofirqs and tasklets +is that tasklets can be allocated dynamically and thus they can be used +by device drivers. A tasklet is represented by :c:type:`struct +tasklet` and as many other kernel structures it needs to be +initialized before being used. A pre-initialized tasklet can be defined +as following: + +.. code-block:: c + + void handler(unsigned long data); + + DECLARE_TASKLET(tasklet, handler, data); + DECLARE_TASKLET_DISABLED(tasklet, handler, data); + + +If we want to initialize the tasklet manually we can use the following +approach: + +.. code-block:: c + + void handler(unsigned long data); + + struct tasklet_struct tasklet; + + tasklet_init(&tasklet, handler, data); + +The *data* parameter will be sent to the handler when it is executed. + +Programming tasklets for running is called scheduling. Tasklets are +running from softirqs. Tasklets scheduling is done with: + +.. code-block:: c + + void tasklet_schedule(struct tasklet_struct *tasklet); + + void tasklet_hi_schedule(struct tasklet_struct *tasklet); + +When using *tasklet_schedule*, a *TASKLET_SOFTIRQ* softirq is +scheduled and all tasklets scheduled are run. For +*tasklet_hi_schedule*, a *HI_SOFTIRQ* softirq is scheduled. + +If a tasklet was scheduled multiple times and it did not run between +schedules, it will run once. Once the tasklet has run, it can be +re-scheduled, and will run again at a later timer. Tasklets can be +re-scheduled from their handlers. + +Tasklets can be masked and the following functions can be used: + +.. code-block:: c + + void tasklet_enable(struct tasklet_struct * tasklet); + void tasklet_disable(struct tasklet_struct * tasklet); + +Remember that since tasklets are running from softirqs, blocking calls +can not be used in the handler function. + +Timers +------ + +A particular type of deferred work, very often used, are timers. They +are defined by :c:type:`struct timer_list`. They run in interrupt +context and are implemented on top of softirqs. + +To be used, a timer must first be initialized by calling :c:func:`timer_setup`: + +.. code-block:: c + + #include + + void timer_setup(struct timer_list * timer, + void (*function)(struct timer_list *), + unsigned int flags); + +The above function initializes the internal fields of the structure +and associates *function* as the timer handler. Since timers are planned +over softirqs, blocking calls can not be used in the code associated +with the treatment function. + +Scheduling a timer is done with :c:func:`mod_timer`: + +.. code-block:: c + + int mod_timer(struct timer_list *timer, unsigned long expires); + +Where *expires* is the time (in the future) to run the handler +function. The function can be used to schedule or reschedule a timer. + +The time unit is *jiffie*. The absolute value of a jiffie +is dependent on the platform and it can be found using the +:c:type:`HZ` macro that defines the number of jiffies for 1 second. To +convert between jiffies (*jiffies_value*) and seconds (*seconds_value*), +the following formulas are used: + +.. code-block:: c + + jiffies_value = seconds_value * HZ ; + seconds_value = jiffies_value / HZ ; + +The kernel maintains a counter that contains the number of jiffies +since the last boot, which can be accessed via the :c:macro:`jiffies` +global variable or macro. We can use it to calculate a time in the +future for timers: + +.. code-block:: c + + #include + + unsigned long current_jiffies, next_jiffies; + unsigned long seconds = 1; + + current_jiffies = jiffies; + next_jiffies = jiffies + seconds * HZ; + +To stop a timer, use :c:func:`del_timer` and :c:func:`del_timer_sync`: + +.. code-block:: c + + int del_timer(struct timer_list *timer); + int del_timer_sync(struct timer_list *timer); + +These functions can be called for both a scheduled timer and an +unplanned timer. :c:func:`del_timer_sync` is used to eliminate the +races that can occur on multiprocessor systems, since at the end of +the call it is guaranteed that the timer processing function does not +run on any processor. + +A frequent mistake in using timers is that we forget to turn off +timers. For example, before removing a module, we must stop the timers +because if a timer expires after the module is removed, the handler +function will no longer be loaded into the kernel and a kernel oops +will be generated. + +The usual sequence used to initialize and schedule a one-second +timeout is: + +.. code-block:: c + + #include + + void timer_function(struct timer_list *); + + struct timer_list timer ; + unsigned long seconds = 1; + + timer_setup(&timer, timer_function, 0); + mod_timer(&timer, jiffies + seconds * HZ); + +And to stop it: + +.. code-block:: c + + del_timer_sync(&timer); + +Locking +------- + +For synchronization between code running in process context (A) and +code running in softirq context (B) we need to use special locking +primitives. We must use spinlock operations augmented with +deactivation of bottom-half handlers on the current processor in (A), +and in (B) only basic spinlock operations. Using spinlocks makes sure +that we don't have races between multiple CPUs while deactivating the +softirqs makes sure that we don't deadlock in the softirq is scheduled +on the same CPU where we already acquired a spinlock. + +We can use the :c:func:`local_bh_disable` and +:c:func:`local_bh_enable` to disable and enable softirqs handlers (and +since they run on top of softirqs also timers and tasklets): + +.. code-block:: c + + void local_bh_disable(void); + void local_bh_enable(void); + +Nested calls are allowed, the actual reactivation of the softirqs is +done only when all local_bh_disable() calls have been complemented by +local_bh_enable() calls: + +.. code-block:: c + + /* We assume that softirqs are enabled */ + local_bh_disable(); /* Softirqs are now disabled */ + local_bh_disable(); /* Softirqs remain disabled */ + + local_bh_enable(); /* Softirqs remain disabled */ + local_bh_enable(); /* Softirqs are now enabled */ + +.. attention:: These above calls will disable the softirqs only on the + local processor and they are usually not safe to use, they must be + complemented with spinlocks. + + +Most of the time device drivers will use special versions of spinlocks +calls for synchronization like :c:func:`spin_lock_bh` and +:c:func:`spin_unlock_bh`: + +.. code-block:: c + + void spin_lock_bh(spinlock_t *lock); + void spin_unlock_bh(spinlock_t *lock); + + +Workqueues +========== + +Workqueues are used to schedule actions to run in process context. The +base unit with which they work is called work. There are two types of +work: + +* :c:type:`struct work_struct` - it schedules a task to run at + a later time +* :c:type:`struct delayed_work` - it schedules a task to run after at + least a given time interval + +A delayed work uses a timer to run after the specified time +interval. The calls with this type of work are similar to those for +:c:type:`struct work_struct`, but has **_delayed** in the functions +names. + +Before using them a work item must be initialized. There are two types +of macros that can be used, one that declares and initializes the work +item at the same time and one that only initializes the work item (and +the declaration must be done separately): + +.. code-block:: c + + #include + + DECLARE_WORK(name , void (*function)(struct work_struct *)); + DECLARE_DELAYED_WORK(name, void(*function)(struct work_struct *)); + + INIT_WORK(struct work_struct *work, void(*function)(struct work_struct *)); + INIT_DELAYED_WORK(struct delayed_work *work, void(*function)(struct work_struct *)); + +:c:func:`DECLARE_WORK` and :c:func:`DECLARE_DELAYED_WORK` declare and +initialize a work item, and :c:func:`INIT_WORK` and +:c:func:`INIT_DELAYED_WORK` initialize an already declared work item. + +The following sequence declares and initiates a work item: + +.. code-block:: c + + #include + + void my_work_handler(struct work_struct *work); + + DECLARE_WORK(my_work, my_work_handler); + +Or, if we want to initialize the work item separately: + +.. code-block:: c + + void my_work_handler(struct work_struct * work); + + struct work_struct my_work; + + INIT_WORK(&my_work, my_work_handler); + +Once declared and initialized, we can schedule the task using +:c:func:`schedule_work` and :c:func:`schedule_delayed_work`: + +.. code-block:: c + + schedule_work(struct work_struct *work); + + schedule_delayed_work(struct delayed_work *work, unsigned long delay); + +:c:func:`schedule_delayed_work` can be used to plan a work item for +execution with a given delay. The delay time unit is jiffies. + +Work items can not be masked but they can be canceled by calling +:c:func:`cancel_delayed_work_sync` or :c:func:`cancel_work_sync`: + +.. code-block:: c + + int cancel_work_sync(struct delayed_work *work); + int cancel_delayed_work_sync(struct delayed_work *work); + +The call only stops the subsequent execution of the work item. If the +work item is already running at the time of the call, it will continue +to run. In any case, when these calls return, it is guaranteed that +the task will no longer run. + +.. attention:: While there are versions of these functions that are + not synchronous (.e.g. :c:func:`cancel_work`) do not + use them when you are performing cleanup work otherwise + race condition could occur. + +We can wait for a workqueue to complete running all of its work items by calling :c:func:`flush_scheduled_work`: + +.. code-block:: c + + void flush_scheduled_work(void); + +This function is blocking and, therefore, can not be used in interrupt +context. The function will wait for all work items to be completed. +For delayed work items, :c:type:`cancel_delayed_work` must be called +before :c:func:`flush_scheduled_work`. + +Finally, the following functions can be used to schedule work items on +a particular processor (:c:func:`schedule_delayed_work_on`), or on all +processors (:c:func:`schedule_on_each_cpu`): + +.. code-block:: c + + int schedule_delayed_work_on(int cpu, struct delayed_work *work, unsigned long delay); + int schedule_on_each_cpu(void(*function)(struct work_struct *)); + +A usual sequence to initialize and schedule a work item is the following: + +.. code-block:: c + + void my_work_handler(struct work_struct *work); + + struct work_struct my_work; + + INIT_WORK(&my_work, my_work_handler); + + schedule_work(&my_work); + +And for waiting for termination of a work item: + +.. code-block:: c + + flush_scheduled_work(); + +As you can see, the *my_work_handler* function receives the task as +the parameter. To be able to access the module's private data, you can +use :c:func:`container_of`: + +.. code-block:: c + + struct my_device_data { + struct work_struct my_work; + // ... + }; + + void my_work_handler(struct work_struct *work) + { + struct my_device_data * my_data; + + my_data = container_of(work, struct my_device_data, my_work); + // ... + } + +Scheduling work items with the functions above will run the handler in +the context of a kernel thread called *events/x*, where x is the +processor number. The kernel will initialize a kernel thread (or a +pool of workers) for each processor present in the system: + +.. code-block:: shell + + $ ps -e + PID TTY TIME CMD + 1? 00:00:00 init + 2 ? 00:00:00 ksoftirqd / 0 + 3 ? 00:00:00 events / 0 <--- kernel thread that runs work items + 4 ? 00:00:00 khelper + 5 ? 00:00:00 kthread + 7? 00:00:00 kblockd / 0 + 8? 00:00:00 kacpid + +The above functions use a predefined workqueue (called events), and +they run in the context of the *events/x* thread, as noted +above. Although this is sufficient in most cases, it is a shared +resource and large delays in work items handlers can cause delays for +other queue users. For this reason there are functions for creating +additional queues. + +A workqueue is represented by :c:type:`struct workqueue_struct`. A new +workqueue can be created with these functions: + +.. code-block:: c + + struct workqueue_struct *create_workqueue(const char *name); + struct workqueue_struct *create_singlethread_workqueue(const char *name); + +:c:func:`create_workqueue` uses one thread for each processor in the +system, and :c:func:`create_singlethread_workqueue` uses a single +thread. + +To add a task in the new queue, use :c:func:`queue_work` or +:c:func:`queue_delayed_work`: + +.. code-block:: c + + int queue_work(struct workqueue_struct * queue, struct work_struct *work); + + int queue_delayed_work(struct workqueue_struct *queue, + struct delayed_work * work , unsigned long delay); + +:c:func:`queue_delayed_work` can be used to plan a work for execution +with a given delay. The time unit for the delay is jiffies. + +To wait for all work items to finish call :c:func:`flush_workqueue`: + +.. code-block:: c + + void flush_workqueue(struct worksqueue_struct * queue); + +And to destroy the workqueue call :c:func:`destroy_workqueue` + +.. code-block:: c + + void destroy_workqueue(struct workqueue_struct *queue); + +The next sequence declares and initializes an additional workqueue, +declares and initializes a work item and adds it to the queue: + +.. code-block:: c + + void my_work_handler(struct work_struct *work); + + struct work_struct my_work; + struct workqueue_struct * my_workqueue; + + my_workqueue = create_singlethread_workqueue("my_workqueue"); + INIT_WORK(&my_work, my_work_handler); + + queue_work(my_workqueue, &my_work); + +And the next code sample shows how to remove the workqueue: + +.. code-block:: c + + flush_workqueue(my_workqueue); + destroy_workqueue(my_workqueue); + +The work items planned with these functions will run in the context of +a new kernel thread called *my_workqueue*, the name passed to +:c:func:`create_singlethread_workqueue`. + +Kernel threads +============== + +Kernel threads have emerged from the need to run kernel code in +process context. Kernel threads are the basis of the workqueue +mechanism. Essentially, a kernel thread is a thread that only runs in +kernel mode and has no user address space or other user attributes. + +To create a kernel thread, use :c:func:`kthread_create`: + +.. code-block:: c + + #include + + struct task_struct *kthread_create(int (*threadfn)(void *data), + void *data, const char namefmt[], ...); + +* *threadfn* is a function that will be run by the kernel thread +* *data* is a parameter to be sent to the function +* *namefmt* represents the kernel thread name, as it is displayed in + ps/top ; Can contain sequences %d , %s etc. Which will be replaced + according to the standard printf syntax. + +For example, the following call: + +.. code-block:: c + + kthread_create (f, NULL, "%skthread%d", "my", 0); + +Will create a kernel thread with the name mykthread0. + +The kernel thread created with this function will be stopped (in the +*TASK_INTERRUPTIBLE* state). To start the kernel thread, call the +:c:func:`wake_up_process`: + +.. code-block:: c + + #include + + int wake_up_process(struct task_struct *p); + +Alternatively, you can use :c:func:`kthread_run` to create and run a +kernel thread: + +.. code-block:: c + + struct task_struct * kthread_run(int (*threadfn)(void *data) + void *data, const char namefmt[], ...); + +Even if the programming restrictions for the function running within +the kernel thread are more relaxed and scheduling is closer to +scheduling in userspace, there are, however, some limitations to be +taken into account. We will list below the actions that can or can not +be made from a kernel thread: + +* can't access the user address space (even with copy_from_user, + copy_to_user) because a kernel thread does not have a user address + space +* can't implement busy wait code that runs for a long time; if the + kernel is compiled without the preemptive option, that code will run + without being preempted by other kernel threads or user processes + thus hogging the system +* can call blocking operations +* can use spinlocks, but if the hold time of the lock is significant, + it is recommended to use mutexes + +The termination of a kernel thread is done voluntarily, within the +function running in the kernel thread, by calling :c:func:`do_exit`: + +.. code-block:: c + + fastcall NORET_TYPE void do_exit(long code); + +Most of the implementations of kernel threads handlers use the same +model and it is recommended to start using the same model to avoid +common mistakes: + +.. code-block:: c + + #include + + DECLARE_WAIT_QUEUE_HEAD(wq); + + // list events to be processed by kernel thread + struct list_head events_list; + struct spin_lock events_lock; + + + // structure describing the event to be processed + struct event { + struct list_head lh; + bool stop; + //... + }; + + struct event* get_next_event(void) + { + struct event *e; + + spin_lock(&events_lock); + e = list_first_entry(&events_list, struct event*, lh); + if (e) + list_del(&e->lh); + spin_unlock(&events_lock); + + return e + } + + int my_thread_f(void *data) + { + struct event *e; + + while (true) { + wait_event(wq, (e = get_next_event)); + + /* Event processing */ + + if (e->stop) + break; + } + + do_exit(0); + } + + /* start and start kthread */ + kthread_run(my_thread_f, NULL, "%skthread%d", "my", 0); + + +With the template above, the kernel thread requests can be issued +with: + +.. code-block:: c + + void send_event(struct event *ev) + { + spin_lock(&events_lock); + list_add(&ev->lh, &events_list); + spin_unlock(&events_lock); + wake_up(&wq); + } + +Further reading +=============== + +* `Linux Device Drivers, 3rd ed., Ch. 7: Time, Delays, and Deferred Work `_ +* `Scheduling Tasks `_ +* `Driver porting: the workqueue interface `_ +* `Workqueues get a rework `_ +* `Kernel threads made easy `_ +* `Unreliable Guide to Locking `_ + +Exercises +========= + +.. include:: ../labs/exercises-summary.hrst +.. |LAB_NAME| replace:: deferred_work + +0. Intro +-------- + +Using |LXR|_, find the definitions of the following symbols: + +* :c:macro:`jiffies` +* :c:type:`struct timer_list` +* :c:func:`spin_lock_bh function` + + +1.Timer +------- + +We're looking at creating a simple kernel module that displays a +message at *TIMER_TIMEOUT* seconds after the module's kernel load. + +Generate the skeleton for the task named **1-2-timer** and follow the +sections marked with **TODO 1** to complete the task. + +.. hint:: Use `pr_info(...)`. Messages will be displayed on the + console and can also be viewed using dmesg. When scheduling + the timer we need to use the absolute time of the system (in + the future) in number of ticks. The current time of the + system in the number of ticks is given by :c:type:`jiffies`. + Thus, the absolute time we need to pass to the timer is + ``jiffies + TIMER_TIMEOUT * HZ``. + + For more information review the `Timers`_ section. + + +2. Periodic timer +----------------- + +Modify the previous module to display the message in once every +TIMER_TIMEOUT seconds. Follow the section marked with **TODO 2** in the +skeleton. + +3. Timer control using ioctl +---------------------------- + +We plan to display information about the current process after N +seconds of receiving a ioctl call from user space. N is transmitted as +ioctl parameter. + +Generate the skeleton for the task named **3-4-5-deferred** and +follow the sections marked with **TODO 1** in the skeleton driver. + +You will need to implement the following ioctl operations. + +* MY_IOCTL_TIMER_SET to schedule a timer to run after a number of + seconds which is received as an argument to ioctl. The timer does + not run periodically. + * This command receives directly a value, not a pointer. + +* MY_IOCTL_TIMER_CANCEL to deactivate the timer. + +.. note:: Review :ref:`ioctl` for a way to access the ioctl argument. + +.. note:: Review the `Timers`_ section for information on enabling / + disabling a timer. In the timer handler, display the current + process identifier (PID) and the process executable image name. + +.. hint:: You can find the current process identifier using the *pid* + and *comm* fields of the current process. For details, + review :ref:`proc-info`. + +.. hint:: To use the device driver from userspace you must create the + device character file */dev/deferred* using the mknod + utility. Alternatively, you can run the + *3-4-5-deferred/kernel/makenode* script that performs this + operation. + +Enable and disable the timer by calling user-space ioctl +operations. Use the *3-4-5-deferred/user/test* program to test +planning and canceling of the timer. The program receives the ioctl +type operation and its parameters (if any) on the command line. + +.. hint:: Run the test executable without arguments to observe the + command line options it accepts. + + To enable the timer after 3 seconds use: + + .. code-block:: c + + ./test s 3 + + To disable the timer use: + + .. code-block:: c + + ./test c + + +Note that every time the current process the timer runs from is +*swapper/0* with PID 0. This process is the idle process. It is +running when there is nothing else to run on. Because the virtual +machine is very light and does not do much it is natural to see this +process most of the time. + +4. Blocking operations +---------------------- + +Next we want to see what happens when we perform blocking operations +in a timer routine. For this we try to call in the timer-handling +routines a function called alloc_io() that simulates a blocking +operation. + +Modify the module so that when you receive *MY_IOCTL_TIMER_ALLOC* +command the timer handler will call :c:func:`alloc_io`. Follow the +sections marked with **TODO 2** in the skeleton. + +Use the same timer. To differentiate functionality in the timer +handler, use a flag in the device structure. Use the +*TIMER_TYPE_ALLOC* and *TIMER_TYPE_SET* macros defined in the code +skeleton. For initialization, use TIMER_TYPE_NONE. + +Run the test program to verify the functionality of task 3. Run the +test program again to call :c:func:`alloc_io()`. + +.. note:: The driver causes an error because a blocking function is + called in the atomic context (the timer handler runs + interrupt context). + +5. Workqueues +------------- + +We will modify the module to prevent the error observed in the +previous task. + +To do so, lets call :c:func:`alloc_io` using workqueues. Schedule a +work item from the timer handler In the work handler (running in +process context) call the :c:func:`alloc_io`. Follow the sections +marked with **TODO 3** in the skeleton and review the `Workqueues`_ +section if needed. + +.. hint:: Add a new field with the type :c:type:`struct work_struct` + in your device structure. Initialize this field. Schedule + the work from the timer handler using :c:func:`schedule_work`. + Schedule the timer handler aften N seconds from the ioctl. + +6. Kernel thread +---------------- + +Implement a simple module that creates a kernel thread that shows the +current process identifier. + +Generate the skeleton for the task named **6-kthread** and follow the +TODOs from the skeleton. + + +.. note:: There are two options for creating and running a thread: + + * :c:func:`kthread_run` to create and run the thread + + * :c:func:`kthread_create` to create a suspended thread and + then start it running with :c:func:`wake_up_process`. + + Review the `Kernel Threads`_ section if needed. + +.. attention:: Synchronize the thread termination with module unloading: + + * The thread should finish when the module is unloaded + + * Wait for the kernel thread to exit before continuing + with unloading + + +.. hint:: For synchronization use two wait queues and two flags. + + Review :ref:`waiting-queues` on how to use waiting queue. + + Use atomic variables for flags. Review :ref:`atomic-variables`. + + +7. Buffer shared between timer and process +------------------------------------------ + +The purpose of this task is to exercise the synchronization between a +deferrable action (a timer) and process context. Set up a periodic +timer that monitors a list of processes. If one of the processes +terminate a message is printed. Processes can be dynamically added to +the list. Use the *3-4-5-deferred/kernel/* skeleton as a base and +follow the **TODO 4** markings to complete the task. + +When the *MY_IOCTL_TIMER_MON* command is received check that the given +process exists and if so add to the monitored list of +processes and then arm the timer after setting its type. + +.. hint:: Use :c:func:`get_proc` which checks the pid, finds the + associated :c:type:`struct task_struct` and allocates a + :c:type:`struct mon_proc` item you can add to your + list. Note that the function also increases the reference + counter of the task, so that its memory won't be free when + the task terminates. + +.. attention:: Use a spinlock to protect the access to the list. Note + that since we share data with the timer handler we need + to disable bottom-half handlers in addition to taking + the lock. Review the `Locking`_ section. + +.. hint:: Collect the information every second from a timer. Use the + existing timer and add new behaviour for it via the + TIMER_TYPE_ACCT. To set the flag, use the *t* argument of + the test program. + + +In the timer handler iterate over the list of monitored processes and +check if they have terminated. If so, print the process name and pid +then remove the process from the list, decrement the task usage +counter so that it's memory can be free and finally free the +:c:type:`struct mon_proc` structure. + +.. hint:: Use the *state* field of :c:func:`struct task_struct`. A + task has terminated if its state is *TASK_DEAD*. + +.. hint:: Use :c:func:`put_task_struct` to decrement the task usage + counter. + +.. attention:: Make sure you protect the list access with a + spinlock. The simple variant will suffice. + +.. attention:: Make sure to use the safe iteration over the list since + we may need to remove an item from the list. + +Rearm the timer after checking the list. diff --git a/Documentation/teaching/labs/device_drivers.rst b/Documentation/teaching/labs/device_drivers.rst new file mode 100644 index 00000000000000..f73121b5396588 --- /dev/null +++ b/Documentation/teaching/labs/device_drivers.rst @@ -0,0 +1,1037 @@ +======================== +Character device drivers +======================== + +Laboratory objectives +===================== + + * understand the concepts behind character device driver + * understand the various operations that can be performed on character devices + * working with waiting queues + +Overview +======== + +In UNIX, hardware devices are accessed by the user through special device +files. These files are grouped into the /dev directory, and system calls +``open``, ``read``, ``write``, ``close``, ``lseek``, ``mmap`` etc. are +redirected by the operating system to the device driver associated with the +physical device. The device driver is a kernel component (usually a module) +that interacts with a hardware device. + +In the UNIX world there are two categories of device files and thus +device drivers: character and block. This division is done by the speed, +volume and way of organizing the data to be transferred from the device to the +system and vice versa. In the first category, there are slow devices, which +manage a small amount of data, and access to data does not require frequent +seek queries. Examples are devices such as keyboard, mouse, serial ports, +sound card, joystick. In general, operations with these devices (read, write) +are performed sequentially byte by byte. The second category includes devices +where data volume is large, data is organized on blocks, and search is common. +Examples of devices that fall into this category are hard drives, cdroms, ram +disks, magnetic tape drives. For these devices, reading and writing is done at +the data block level. + +For the two types of device drivers, the Linux kernel offers different APIs. +If for character devices system calls go directly to device drivers, in case of +block devices, the drivers do not work directly with system calls. In +the case of block devices, communication between the user-space and the block +device driver is mediated by the file management subsystem and the block device +subsystem. The role of these subsystems is to prepare the device driver's +necessary resources (buffers), to keep the recently read data in the cache +buffer, and to order the read and write operations for performance reasons. + +Majors and minors +================= + +In UNIX, the devices traditionally had a unique, fixed identifier associated +with them. This tradition is preserved in Linux, although identifiers can be +dynamically allocated (for compatibility reasons, most drivers still use static +identifiers). The identifier consists of two parts: major and minor. The first +part identifies the device type (IDE disk, SCSI disk, serial port, etc.) +and the second one identifies the device (first disk, second serial port, +etc.). Most times, the major identifies the driver, while the minor identifies +each physical device served by the driver. In general, a driver will have a +major associate and will be responsible for all minors associated with that +major. + +.. code-block:: bash + + $ ls -la /dev/hda? /dev/ttyS? + brw-rw---- 1 root disk 3, 1 2004-09-18 14:51 /dev/hda1 + brw-rw---- 1 root disk 3, 2 2004-09-18 14:51 /dev/hda2 + crw-rw---- 1 root dialout 4, 64 2004-09-18 14:52 /dev/ttyS0 + crw-rw---- 1 root dialout 4, 65 2004-09-18 14:52 /dev/ttyS1 + +As can be seen from the example above, device-type information can be found +using the ls command. The special character files are identified by the ``c`` +character in the first column of the command output, and the block type by the +character ``b``. In columns ``5`` and ``6`` of the result you can see the +major, respectively the minor for each device. + +Certain major identifiers are statically assigned to devices (in the +``Documentation/admin-guide/devices.txt`` file from the kernel sources). When choosing the +identifier for a new device, you can use two methods: static (choose a number +that does not seem to be used already) or dynamically. In /proc/devices are the +loaded devices, along with the major identifier. + +To create a device type file, use the ``mknod`` command; the command receives the +type (``block`` or ``character``), ``major`` and ``minor`` of the device +(``mknod name type major minor``). Thus, if you want to create a character device +named ``mycdev`` with the major ``42`` and minor ``0``, use the command: + +.. code-block:: bash + + # mknod /dev/mycdev c 42 0 + +To create the block device with the name ``mybdev`` with the major 240 and minor 0 +the command will be: + +.. code-block:: bash + + # mknod /dev/mybdev b 240 0 + +Next, we'll refer to character devices as drivers. + +Data structures for a character device +====================================== + +In the kernel, a character-type device is represented by +:c:type:`struct cdev `, a structure used to register it in the +system. Most driver operations use three important structures: +``struct file_operations``, ``struct file`` and ``struct inode``. + +:c:type:`struct file_operations` +-------------------------------- + +As mentioned above, the character device drivers receive unaltered system calls +made by users over device-type files. Consequently, implementation of a character +device driver means implementing the system calls specific to files: ``open``, +``close``, ``read``, ``write``, ``lseek``, ``mmap``, etc. These operations are +described in the fields of the ``struct file_operations`` structure: + +.. code-block:: c + + #include + + struct file_operations { + struct module *owner; + loff_t (*llseek) (struct file *, loff_t, int); + ssize_t (*read) (struct file *, char __user *, size_t, loff_t *); + ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *); + [...] + long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long); + [...] + int (*open) (struct inode *, struct file *); + int (*flush) (struct file *, fl_owner_t id); + int (*release) (struct inode *, struct file *); + [...] + +.. ** + +It can be noticed that the signature of the function differs from the system +call that the user uses. The operating system sits between the user and +the device driver to simplify implementation in the device driver. + +``open`` does not receive the parameter path or the various parameters that control +the file opening mode. Similarly, ``read``, ``write``, ``release``, ``ioctl``, ``lseek`` +do not receive as a parameter a file descriptor. Instead, these routines receive as +parameters two structures: ``file`` and ``inode``. Both structures represent a file, +but from different perspectives. + +Most parameters for the presented operations have a direct meaning: + * ``file`` and ``inode`` identifies the device type file; + * ``size`` is the number of bytes to be read or written; + * ``offset`` is the displacement to be read or written (to be updated + accordingly); + * ``user_buffer`` user buffer from which it reads / writes; + * ``whence`` is the way to seek (the position where the search operation starts); + * ``cmd`` and ``arg`` are the parameters sent by the users to the ioctl call (IO + control). + +``inode`` and ``file`` structures +--------------------------------- + +An ``inode`` represents a file from the point of view of the file system. Attributes +of an inode are the size, rights, times associated with the file. An inode uniquely +identifies a file in a file system. + +The ``file`` structure is still a file, but closer to the user's point of view. +From the attributes of the file structure we list: the inode, the file name, +the file opening attributes, the file position. All open files at a given time +have associated a ``file`` structure. + +To understand the differences between inode and file, we will use an analogy +from object-oriented programming: if we consider a class inode, then the files +are objects, that is, instances of the inode class. Inode represents the static +image of the file (the inode has no state), while the file represents the +dynamic image of the file (the file has state). + +Returning to device drivers, the two entities have almost always standard ways +of using: the inode is used to determine the major and minor of the device on +which the operation is performed, and the file is used to determine the flags +with which the file was opened, but also to save and access (later) private +data. + +The file structure contains, among many fields: + + * ``f_mode``, which specifies read (``FMODE_READ``) or write + (``FMODE_WRITE``); + * ``f_flags``, which specifies the file opening flags (``O_RDONLY``, + ``O_NONBLOCK``, ``O_SYNC``, ``O_APPEND``, ``O_TRUNC``, etc.); + * ``f_op``, which specifies the operations associated with the file (pointer to + the ``file_operations`` structure ); + * ``private_data``, a pointer that can be used by the programmer to store + device-specific data; The pointer will be initialized to a memory location + assigned by the programmer. + * ``f_pos``, the offset within the file + +The inode structure contains, among much information, an ``i_cdev`` +field, which is a pointer to the structure that defines the character +device (when the inode corresponds to a character device). + +Implementation of operations +============================ + +To implement a device driver, it is recommended that you create a structure +that contains information about the device, information used in the module. In +the case of a driver for a character device, the structure will contain a cdev +structure field to refer to the device. The following example uses the struct +my_device_data: + +.. code-block:: c + + #include + #include + + struct my_device_data { + struct cdev cdev; + /* my data starts here */ + //... + }; + + static int my_open(struct inode *inode, struct file *file) + { + struct my_device_data *my_data; + + my_data = container_of(inode->i_cdev, struct my_device_data, cdev); + + file->private_data = my_data; + //... + } + + static int my_read(struct file *file, char __user *user_buffer, size_t size, loff_t *offset) + { + struct my_device_data *my_data; + + my_data = (struct my_device_data *) file->private_data; + + //... + } + +.. ** + +A structure like ``my_device_data`` will contain the data associated with a device. +The ``cdev`` field (``cdev`` type) is a character-type device and is used to record it +in the system and identify the device. The pointer to the ``cdev`` member can be +found using the ``i_cdev`` field of the ``inode`` structure (using the ``container_of`` +macro). In the private_data field of the file structure, information can be +stored at open which is then available in the ``read``, ``write``, ``release``, etc. +routines. + +Registration and unregistration of character devices +==================================================== + +The registration/unregistration of a device is made by specifying the major and +minor. The ``dev_t`` type is used to keep the identifiers of a device (both major +and minor) and can be obtained using the ``MKDEV`` macro. + +For the static assignment and unallocation of device identifiers, the +``register_chrdev_region`` and ``unregister_chrdev_region`` functions are used: + +.. code-block:: c + + #include + + int register_chrdev_region(dev_t first, unsigned int count, char *name); + void unregister_chrdev_region(dev_t first, unsigned int count); + +.. ** + +It is recommended that device identifiers be dynamically assigned to the +``alloc_chrdev_region`` function. + +Below sequence reserves ``my_minor_count`` devices, starting with ``my_major`` +major and ``my_first_minor`` minor (if the max value for minor is exceeded, +move to the next major): + +.. code-block:: c + + #include + ... + + err = register_chrdev_region(MKDEV(my_major, my_first_minor), my_minor_count, + "my_device_driver"); + if (err != 0) { + /* report error */ + return err; + } + ... + +.. ** + +After assigning the identifiers, the character device will have to be +initialized (``cdev_init``) and the kernel will have to be notified(``cdev_add``). The +``cdev_add`` function must be called only after the device is ready to receive +calls. Removing a device is done using the ``cdev_del`` function. + +.. code-block:: c + + #include + + void cdev_init(struct cdev *cdev, struct file_operations *fops); + int cdev_add(struct cdev *dev, dev_t num, unsigned int count); + void cdev_del(struct cdev *dev); + +.. ** + +The following sequence registers and initializes MY_MAX_MINORS devices: + +.. code-block:: c + + #include + #include + + #define MY_MAJOR 42 + #define MY_MAX_MINORS 5 + + struct my_device_data { + struct cdev cdev; + /* my data starts here */ + //... + }; + + struct my_device_data devs[MY_MAX_MINORS]; + + const struct file_operations my_fops = { + .owner = THIS_MODULE, + .open = my_open, + .read = my_read, + .write = my_write, + .release = my_release, + .unlocked_ioctl = my_ioctl + }; + + int init_module(void) + { + int i, err; + + err = register_chrdev_region(MKDEV(MY_MAJOR, 0), MY_MAX_MINORS, + "my_device_driver"); + if (err != 0) { + /* report error */ + return err; + } + + for(i = 0; i < MY_MAX_MINORS; i++) { + /* initialize devs[i] fields */ + cdev_init(&devs[i].cdev, &my_fops); + cdev_add(&devs[i].cdev, MKDEV(MY_MAJOR, i), 1); + } + + return 0; + } + +.. ** + +While the following sequence deletes and unregisters them: + +.. code-block:: c + + void cleanup_module(void) + { + int i; + + for(i = 0; i < MY_MAX_MINORS; i++) { + /* release devs[i] fields */ + cdev_del(&devs[i].cdev); + } + unregister_chrdev_region(MKDEV(MY_MAJOR, 0), MY_MAX_MINORS); + } + +.. ** + +.. note:: Initialization of the struct my_fops used the initialization + of members by name, defined in C99 standard (see designated + initializers and the file_operations structure). Structure + members who do not explicitly appear in this initialization + will be set to the default value for their type. For + example, after the initialization above, ``my_fops.mmap`` will + be NULL. + +.. _access_to_process_address_space: + +Access to the address space of the process +========================================== + +A driver for a device is the interface between an application and hardware. As +a result, we often have to access user-space data. Accessing it can not be done +directly (by dereferencing a user-space pointer). Direct access of a +user-space pointer can lead to incorrect behavior (depending on architecture, a +user-space pointer may not be valid or mapped to kernel-space), a kernel oops +(the user-mode pointer can refer to a non-resident memory area) or security +issues. Proper access to user-space data is done by calling the macros / +functions below: + +.. code-block:: c + + #include + + put_user(type val, type *address); + get_user(type val, type *address); + unsigned long copy_to_user(void __user *to, const void *from, unsigned long n); + unsigned long copy_from_user(void *to, const void __user *from, unsigned long n); + +.. ** + +All macros / functions return 0 in case of success and another value in case of +error and have the following roles: + + * ``put_user`` store the value ``val`` to user-space address ``address``; + Type can be one on 8, 16, 32, 64 bit (the maximum supported type depends on the + hardware platform); + * ``get_user`` analogue to the previous function, only that val will be set to a + value identical to the value at the user-space address given by address; + * ``copy_to_user`` copies ``n`` bytes from the kernel-space, from the address + referenced by ``from`` in user-space to the address referenced by ``to``; + * ``copy_from_user`` copies ``n`` bytes from user-space from the address + referenced by ``from`` in kernel-space to the address referenced by ``to``. + +A common section of code that works with these functions is: + +.. code-block:: c + + #include + + /* + * Copy at most size bytes to user space. + * Return ''0'' on success and some other value on error. + */ + if (copy_to_user(user_buffer, kernel_buffer, size)) + return -EFAULT; + else + return 0; + +Open and release +================ + +The ``open`` function performs the initialization of a device. In most cases, +these operations refer to initializing the device and filling in specific data +(if it is the first open call). The release function is about releasing +device-specific resources: unlocking specific data and closing the device if +the last call is close. + +In most cases, the open function will have the following structure: + +.. code-block:: c + + static int my_open(struct inode *inode, struct file *file) + { + struct my_device_data *my_data = + container_of(inode->i_cdev, struct my_device_data, cdev); + + /* validate access to device */ + file->private_data = my_data; + + /* initialize device */ + ... + + return 0; + } + +.. ** + +A problem that occurs when implementing the ``open`` function is access control. +Sometimes a device needs to be opened once at a time; More specifically, do not +allow the second open before the release. To implement this restriction, you +choose a way to handle an open call for an already open device: it can return +an error (``-EBUSY``), block open calls until a release operation, or shut down +the device before do the open. + +At the user-space call of the open and close functions on the device, call +my_open and my_release in the driver. An example of a user-space call: + +.. code-block:: c + + int fd = open("/dev/my_device", O_RDONLY); + if (fd < 0) { + /* handle error */ + } + + /* do work */ + //.. + + close(fd); + +.. ** + +Read and write +============== + +The read and write operations are reaching the device driver as a +result of an user-space program calling the read or write system calls: + +.. code-block:: c + + if (read(fd, buffer, size) < 0) { + /* handle error */ + } + + if (write(fd, buffer, size) < 0) { + /* handle error */ + } + +.. ** + +The ``read`` and ``write`` functions transfer data between the device and the +user-space: the read function reads the data from the device and transfers it +to the user-space, while writing reads the user-space data and writes it to the +device. The buffer received as a parameter is a user-space pointer, which is +why it is necessary to use the ``copy_to_user`` or ``copy_from_user`` functions. + +The value returned by read or write can be: + + * the number of bytes transferred; if the returned value is less than the size + parameter (the number of bytes requested), then it means that a partial + transfer was made. Most of the time, the user-space app calls the system call + (read or write) function until the required data number is transferred. + * 0 to mark the end of the file in the case of read ; if write returns the + value 0 then it means that no byte has been written and that no error has + occurred; In this case, the user-space application retries the write call. + * a negative value indicating an error code. + +To perform a data transfer consisting of several partial transfers, the +following operations should be performed: + + * transfer the maximum number of possible bytes between the buffer received + as a parameter and the device (writing to the device/reading from the device + will be done from the offset received as a parameter); + * update the offset received as a parameter to the position from which the + next read / write data will begin; + * return the number of bytes transferred. + +The sequence below shows an example for the read function that takes +into account the internal buffer size, user buffer size and the offset: + +.. code-block:: c + + static int my_read(struct file *file, char __user *user_buffer, + size_t size, loff_t *offset) + { + struct my_device_data *my_data = (struct my_device_data *) file->private_data; + ssize_t len = min(my_data->size - *offset, size); + + if (len <= 0) + return 0; + + /* read data from my_data->buffer to user buffer */ + if (copy_to_user(user_buffer, my_data->buffer + *offset, len)) + return -EFAULT; + + *offset += len; + return len; + } + +.. ** + +The images below illustrate the read operation and how data is +transferred between the user-space and the driver: + + 1. when the driver has enough data available (starting with the OFFSET + position) to accurately transfer the required size (SIZE) to the user. + 2. when a smaller amount is transferred than required. + +.. image:: ../res/read.png + :width: 49 % +.. image:: ../res/read2.png + :width: 49 % + +We can look at the read operation implemented by the driver as a response to a +user-space read request. In this case, the driver is responsible for advancing +the offset according to how much it reads and returning the read size (which +may be less than what is required). + +The structure of the write function is similar: + +.. code-block:: c + + static int my_write(struct file *file, const char __user *user_buffer, + size_t size, loff_t * offset) + { + struct my_device_data *my_data = (struct my_device_data *) file->private_data; + ssize_t len = min(my_data->size - *offset, size); + + if (len <= 0) + return 0; + + /* read data from user buffer to my_data->buffer */ + if (copy_from_user(my_data->buffer + *offset, user_buffer, len)) + return -EFAULT; + + *offset += len; + return len; + } + +.. ** + +The write operation will respond to a write request from user-space. In +this case, depending on the maximum driver capacity (MAXSIZ), it can +write more or less than the required size. + +.. image:: ../res/write.png + :width: 49 % +.. image:: ../res/write2.png + :width: 49 % + +.. _ioctl: + +ioctl +===== + +In addition to read and write operations, a driver needs the ability to perform +certain physical device control tasks. These operations are accomplished by +implementing a ``ioctl`` function. Initially, the ioctl system call used Big Kernel +Lock. That's why the call was gradually replaced with its unlocked version +called ``unlocked_ioctl``. You can read more on LWN: +http://lwn.net/Articles/119652/ + +.. code-block:: c + + static long my_ioctl (struct file *file, unsigned int cmd, unsigned long arg); + +.. ** + +``cmd`` is the command sent from user-space. If a value is being sent from the +user-space call, it can be accessed directly. If a buffer is fetched, the arg +value will be a pointer to it, and must be accessed through the ``copy_to_user`` +or ``copy_from_user``. + +Before implementing the ``ioctl`` function, the numbers corresponding to the +commands must be chosen. One method is to choose consecutive numbers starting +at 0, but it is recommended to use ``_IOC(dir, type, nr, size)`` macro definition +to generate ioctl codes. The macro definition parameters are as follows: + + * ``dir`` represents the data transfer (``_IOC_NONE`` , ``_IOC_READ``, + ``_IOC_WRITE``). + * ``type`` represents the magic number (``Documentation/ioctl/ioctl-number.txt``); + * ``nr`` is the ioctl code for the device; + * ``size`` is the size of the transferred data. + +The following example shows an implementation for a ``ioctl`` function: + +.. code-block:: c + + #include + + #define MY_IOCTL_IN _IOC(_IOC_WRITE, 'k', 1, sizeof(my_ioctl_data)) + + static long my_ioctl (struct file *file, unsigned int cmd, unsigned long arg) + { + struct my_device_data *my_data = + (struct my_device_data*) file->private_data; + my_ioctl_data mid; + + switch(cmd) { + case MY_IOCTL_IN: + if( copy_from_user(&mid, (my_ioctl_data *) arg, + sizeof(my_ioctl_data)) ) + return -EFAULT; + + /* process data and execute command */ + + break; + default: + return -ENOTTY; + } + + return 0; + } + +.. ** + +At the user-space call for the ioctl function, the my_ioctl function of the +driver will be called. An example of such a user-space call: + +.. code-block:: c + + if (ioctl(fd, MY_IOCTL_IN, buffer) < 0) { + /* handle error */ + } + +.. ** + +Waiting queues +============== + +It is often necessary for a thread to wait for an operation to finish, +but it is desirable that this wait is not busy-waiting. Using waiting +queues we can block a thread until an event occurs. When the condition +is satisfied, elsewhere in the kernel, in another process, in an +interrupt or deferrable work, we will wake up the process. + +A waiting queue is a list of processes that are waiting for a specific +event. A queue is defined with the ``wait_queue_head_t`` type and can +be used by the functions/macros: + +.. code-block:: c + + #include + + DECLARE_WAIT_QUEUE_HEAD(wq_name); + + void init_waitqueue_head(wait_queue_head_t *q); + + int wait_event(wait_queue_head_t q, int condition); + + int wait_event_interruptible(wait_queue_head_t q, int condition); + + int wait_event_timeout(wait_queue_head_t q, int condition, int timeout); + + int wait_event_interruptible_timeout(wait_queue_head_t q, int condition, int timeout); + + void wake_up(wait_queue_head_t *q); + + void wake_up_interruptible(wait_queue_head_t *q); + +.. ** + +The roles of the macros / functions above are: + + * :c:func:`init_waitqueue_head` initializes the queue; to initialize the + queue at compile time, you can use the :c:macro:`DECLARE_WAIT_QUEUE_HEAD` macro; + * :c:func:`wait_event` and :c:func:`wait_event_interruptible` adds the current thread to the + queue while the condition is false, sets it to TASK_UNINTERRUPTIBLE or + TASK_INTERRUPTIBLE and calls the scheduler to schedule a new thread; Waiting + will be interrupted when another thread will call the wake_up function; + * :c:func:`wait_event_timeout` and :c:func:`wait_event_interruptible_timeout` have the same + effect as the above functions, only waiting can be interrupted at the end of + the timeout received as a parameter; + * :c:func:`wake_up` puts all threads off from state TASK_INTERRUPTIBLE and + TASK_UNINTERRUPTIBLE in TASK_RUNNING status; Remove these threads from the + queue; + * :c:func:`wake_up_interruptible` same action, but only threads with TASK_INTERRUPTIBLE + status are woken up. + +A simple example is that of a thread waiting to change the value of a flag. The +initializations are done by the sequence: + +.. code-block:: c + + #include + + wait_queue_head_t wq; + int flag = 0; + + init_waitqueue_head(&wq); + +.. ** + +A thread will wait for the flag to be changed to a value other than zero: + +.. code-block:: c + + wait_event_interruptible(wq, flag != 0); + +.. ** + +While another thread will change the flag value and wake up the waiting threads: + +.. code-block:: c + + flag = 1 ; + wake_up_interruptible (&wq); + +.. ** + +Exercises +========= + +.. include:: ../labs/exercises-summary.hrst +.. |LAB_NAME| replace:: device_drivers + +0. Intro +-------- + +Using `LXR `_ find the definitions +of the following symbols in the Linux kernel: + + * :c:type:`struct file` + * :c:type:`struct file_operations` + * :c:type:`generic_ro_fops` + * :c:func:`vfs_read` + + +1. Register/unregister +---------------------- + +The driver will control a single device with the ``MY_MAJOR`` major and +``MY_MINOR`` minor (the macros defined in the kernel/so2_cdev.c file). + + 1. Create **/dev/so2_cdev** character device node using **mknod**. + + .. hint:: Read `Majors and minors`_ section in the lab. + + 2. Implement the registration and deregistration of the device with the name + ``so2_cdev``, respectively in the init and exit module functions. Implement **TODO 1**. + + .. hint:: Read the section `Registration and unregistration of character devices`_ + + 3. Display, using ``pr_info``, a message after the registration and unregistration + operations to confirm that they were successful. Then load the module into the kernel: + + .. code-block:: bash + + $ insmod so2_cdev.ko + + And see character devices in ``/proc/devices``: + + .. code-block:: bash + + $ cat /proc/devices | less + + Identify the device type registered with major 42 . Note that ``/proc/devices`` + contains only the device types (major) but not the actual devices (i.e. minors). + + .. note:: Entries in /dev are not created by loading the module. These can be created + in two ways: + + * manually, using the ``mknod`` command as we did above. + * automatically using udev daemon + + 4. Unload the kernel module + + .. code-block:: bash + + rmmod so2_cdev + +2. Register an already registered major +--------------------------------------- + +Modify **MY_MAJOR** so that it points to an already used major number. + +.. hint:: See ``/proc/devices`` to get an already assigned major. + +See `errno-base.h `_ +and figure out what does the error code mean. +Return to the initial configuration of the module. + +3. Open and close +----------------- + +Run ``cat /dev/so2_cdev`` to read data from our char device. +Reading does not work because the driver does not have the open function implemented. +Follow comments marked with TODO 2 and implement them. + + 1. Initialize your device + + * add a cdev struct field to ``so2_device_data`` structure. + * Read the section `Registration and unregistration of character devices`_ in the lab. + + 2. Implement the open and release functions in the driver. + 3. Display a message in the open and release functions. + 4. Read again ``/dev/so2_cdev`` file. Follow the messages displayed by the kernel. + We still get an error because ``read`` function is not yet implemented. + +.. note:: The prototype of a device driver's operations is in the ``file_operations`` + structure. Read `Open and release`_ section. + +4. Access restriction +--------------------- + +Restrict access to the device with atomic variables, so that a single process +can open the device at a time. The rest will receive the "device busy" error +(``-EBUSY``). Restricting access will be done in the open function displayed by +the driver. Follow comments marked with **TODO 3** and implement them. + + 1. Add an ``atomic_t`` variable to the device structure. + 2. Initialize the variable at module initialization. + 3. Use the variable in the open function to restrict access to the device. We + recommend using :c:func:`atomic_cmpxchg`. + 4. Reset the variable in the release function to retrieve access to the device. + 5. To test your deployment, you'll need to simulate a long-term use of your + device. To simulate a sleep, call the scheduler at the end of the device opening: + +.. code-block:: bash + + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(1000); + +.. ** + + 6. Test using ``cat /dev/so2_cdev`` & ``cat /dev/so2_cdev``. + + +.. note:: The advantage of the atomic_cmpxchg function is that it can check the + old value of the variable and set it up to a new value, all in one + atomic operation. Read more details about `atomic_cmpxchg `_ + An example of use is `here `_. + +5. Read operation +----------------- + +Implement the read function in the driver. Follow comments marked with ``TODO 4`` and implement them. + + 1. Keep a buffer in ``so2_device_data`` structure initialized with the value of ``MESSAGE`` macro. + Initializing this buffer will be done in module ``init`` function. + 2. At a read call, copy the contents of the kernel space buffer into the user + space buffer. + + * Use the :c:func:`copy_to_user` function to copy information from kernel space to + user space. + * Ignore the size and offset parameters at this time. You can assume that + the buffer in user space is large enough. You do not need to check the + validity of the size argument of the read function. + * The value returned by the read call is the number of bytes transmitted + from the kernel space buffer to the user space buffer. + + 3. After implementation, test using ``cat /dev/so2_cdev``. + +.. note:: The command ``cat /dev/so2_cdev`` does not end (use Ctrl+C). + Read the `read and write`_ sections and `Access to the address space of the process`_ + If you want to display the offset value use a construction of the form: + ``pr_info("Offset: %lld \n", *offset)``; The data type loff_t (used by offset ) is a typedef for long long int. + +The ``cat`` command reads to the end of the file, and the end of the file is +signaled by returning the value 0 in the read. Thus, for a correct implementation, +you will need to update and use the offset received as a parameter in the read +function and return the value 0 when the user has reached the end of the buffer. + +Modify the driver so that the ``cat`` commands ends: + + 1. Use the size parameter. + 2. For every read, update the offset parameter accordingly. + 3. Ensure that the read function returns the number of bytes that were copied + into the user buffer. + +.. note:: By dereferencing the offset parameter it is possible to read and move the current + position in the file. Its value needs to be updated every time a read is done + successfully. + +6. Write operation +------------------ + +Add the ability to write a message into kernel buffer to replace the predefined message. Implement +the write function in the driver. Follow comments marked with ``TODO 5`` + +Ignore the offset parameter at this time. You can assume that the driver buffer is +large enough. You do not need to check the validity of the write function size +argument. + +.. note:: The prototype of a device driver's operations is in the file_operations + structure. + Test using commands: + + .. code-block:: bash + + echo "arpeggio"> /dev/so2_cdev + cat /dev/so2_cdev + + Read the `read and write`_ sections and `Access to the address space of the process`_ + +7. ioctl operation +------------------ + +For this exercise, we want to add the ioctl ``MY_IOCTL_PRINT`` to display the +message from the ``IOCTL_MESSAGE`` macro in the driver. +Follow the comments marked with ``TODO 6`` + +For this: + + 1. Implement the ioctl function in the driver. + 2. We need to use ``user/so2_cdev_test.c`` to call the + ioctl function with the appropriate parameters. + 3. To test, we will use an user-space program (``user/so2_cdev_test.c``) + which will call the ``ioctl`` function with the required arguments. + +.. note:: The macro ``MY_IOCTL_PRINT`` is defined in the file ``include/so2_cdev.h``, + which is shared between the kernel module and the user-space program. + + Read the `ioctl`_ section in the lab. + +.. note:: The user-space code is compiled automatically at ``make build`` and + copied at ``make copy``. + + Because we need to compile the program for qemu machine which is 32 bit, + if your host is 64 bit then you need to install ``gcc-multilib`` package. + +Extra Exercises +=============== + +Ioctl with messaging +-------------------- + +Add two ioctl operations to modify the message associated with the +driver. Use fixed-length buffer ( BUFFER_SIZE ). + + 1. Add the ``ioctl`` function from the driver the following operations: + + * ``MY_IOCTL_SET_BUFFER`` for writing a message to the device; + * ``MY_IOCTL_GET_BUFFER`` to read a message from your device. + + 2. For testing, pass the required command line arguments to the + user-space program. + +.. note:: Read the `ioctl`_ and `Access to the address space of the process`_ + sections of the lab. + +Ioctl with waiting queues +------------------------- + +Add two ioctl operations to the device driver for queuing. + + 1. Add the ``ioctl`` function from the driver the following operations: + + * ``MY_IOCTL_DOWN`` to add the process to a queue; + * ``MY_IOCTL_UP`` to remove the process from a queue. + + 2. Fill the device structure with a ``wait_queue_head_t`` field and a flag. + 3. Do not forget to initialize the wait queue and flag. + 4. Remove exclusive access condition from previous exercise + 5. For testing, pass the required command line arguments to the + user-space program. + +When the process is added to the queue, it will remain blocked in execution; To +run the queue command open a new console in the virtual machine with Alt+F2 ; +You can return to the previous console with Alt+F1. If you're connected via +SSH to the virtual machine, open a new console. + +.. note:: Read the `ioctl`_ and `Waiting queues`_ sections in the lab. + +O_NONBLOCK implementation +------------------------- + +.. note:: If a file is open with the ``O_NONBLOCK`` flag, then its + operations will be non-blocking. + + In case data is not available when performing a read, the following + happens: + + * if the file has been open with ``O_NONBLOCK``, the read call + will return ``-EWOULDBLOCK``. + * otherwise, the current task (process) will be placed in a waiting + queue and will be unblocked as soon as data becomes available + (in our case, at write). + +* To allow unblocking the read operation, remove the exclusive access + condition from previous exercises. +* You can use the queue defined for the previous exercise. +* You can ignore the file offset. +* Modify the initial size of data to ``0``, to allow testing. +* For testing, pass the required command line arguments to the + user-space program. + + * when using the ``n`` option, the test program will change the open flags + to ``O_NONBLOCK`` and then perform a ``read``. + +* What are the flags used to open the file when running ``cat /dev/so2_dev``? + diff --git a/Documentation/teaching/labs/device_model.rst b/Documentation/teaching/labs/device_model.rst new file mode 100644 index 00000000000000..032990d6c52102 --- /dev/null +++ b/Documentation/teaching/labs/device_model.rst @@ -0,0 +1,1286 @@ +================== +Linux Device Model +================== + +Overview +======== + +Plug and Play is a technology that offers support for automatically adding and +removing devices to the system. This reduces conflicts with the resources they +use by automatically configuring them at system startup. In order to achieve +these goals, the following features are required: + + * Automatic detection of adding and removing devices in the system (the device + and its bus must notify the appropriate driver that a configuration change + occurred). + * Resource management (addresses, irq lines, DMA channels, memory areas), + including resource allocation to devices and solving conflicts that may arise. + * Devices must allow for software configuration (device resources - ports, + interrupts, DMA resources - must allow for driver assignment). + * The drivers required for new devices must be loaded automatically by the + operating system when needed. + * When the device and its bus allow, the system should be able to add or + remove the device from the system while it is running, without having to reboot + the system (hotplug). + +For a system to support plug and play, the BIOS, operating system and the device +must support this technology. The device must have an ID that will provide to the +driver for identification, and the operating system must be able to identify +these configuration changes as they appear. + +Plug and play devices are: PCI devices (network cards), USB (keyboard, mouse, +printer), etc. + +Prior to version 2.6, the kernel did not have a unified model to get +information about devices. +For this reason, a model for Linux devices, Linux Device Model, was developed. + +The primary purpose of this model is to maintain internal data structures that +reflect the state and structure of the system. Such information includes what +devices are in the system, how they are in terms of power management, what bus +they are attached to, what drivers they have, along with the structure of the +buses, devices, drivers in the system. + +To maintain this information, the kernel uses the following entities: + + * device - a physical device that is attached to a bus + * driver - a software entity that can be associated with a device and performs + operations with it + * bus - a device to which other devices can be attached + * class - a type of device that has a similar behavior; There is a class for + disks, partitions, serial ports, etc. + * subsystem - a view on the structure of the system; Kernel subsystems + include devices (hierarchical view of all devices in the system), buses (bus + view of devices according to how they are attached to buses), classes, etc. + +sysfs +===== + +The kernel provides a representation of its model in userspace through the +sysfs virtual file system. It is usually mounted in the /sys directory and +contains the following subdirectories: + + * block - all block devices available in the system (disks, partitions) + * bus - types of bus to which physical devices are connected (pci, ide, usb) + * class - drivers classes that are available in the system (net, sound, usb) + * devices - the hierarchical structure of devices connected to the system + * firmware - information from system firmware (ACPI) + * fs - information about mounted file systems + * kernel - kernel status information (logged-in users, hotplug) + * module - the list of modules currently loaded + * power - information related to the power management subsystem + +As you can see, there is a correlation between the kernel data structures +within the described model and the subdirectories in the sysfs virtual file +system. Although this likeness may lead to confusion between the two concepts, +they are different. The kernel device model can work without the sysfs file +system, but the reciprocal is not true. + +The sysfs information is found in files that contain an attribute. Some +standard attributes (represented by files or directories with the same name) +are as follows: + + * dev - Major and minor device identifier. It can be used to automatically + create entries in the /dev directory + * device - a symbolic link to the directory containing devices; It can be + used to discover the hardware devices that provide a particular service (for + example, the ethi PCI card) + * driver - a symbolic link to the driver directory (located in + /sys/bus/\*/drivers ) + +Other attributes are available, depending on the bus and driver used. + +.. ditaa:: + +------+ + | /sys | + +--+---+ + | + +----------------------------------------------------+-------------------------------------+-----------------------------------------+ + | | | | + v v v v + +-----+ +-------+ +---------+ +--------+ + | bus | | class | | devices | | module | + +--+--+ +---+---+ +----+----+ +---+----+ + | | | | + | | | +-------------+-----------------+ + | | | | | + v v v v v + +------------------------+ +-----------------------+ +-------------------------+ +----------------------+ +-------------------------+ + | mybus: struct bus_type | | myclass: struct class | | mybus0: struct device | | mybus: struct module | | mydriver: struct module | + +-------------+----------+ +----------+------------+ +-----------+-------------+ +----------------------+ +-------------------------+ + | | | + +--------+--------------+ v v + | | +-------------------------------+ +----------------------+ + v v | myclass0: struct class_device | | mydev: struct device | + +---------+ +---------+ +-------------------------------+ +----------------------+ + | devices | | drivers | + +---------+ +---+-----+ + | + v + +--------------------------------+ + | mydriver: struct device_driver | + +--------------------------------+ + + +Basic Structures in Linux Devices +================================= + +Linux Device Model provides a number of structures to ensure the interaction +between a hardware device and a device driver. The whole model is based on +kobject structure. Hierarchies are built using this structure and the following +structures are implemented: + + * struct bus_type + * struct device + * struct device_driver + + +.. ditaa:: + :--no-separation: + + +--+ +--+ +--+ + mydriver.c | | mybus.c | | bus/driver/device core | | kobject core + | | | | | | + | | | | | | + | | | | | | + | | +-----------------------------+ | | +-----------------------------+ | | + | | | my_bus_type +------=>+ struct bus_type | | | + | | +-----------------------------+ | | +-----------------------------+ | | + | | |name | | | |name | | | + | | |uevent() = my_uevent() | | | |uevent() | | | + | | |match() = my_match() | | | |match() | | | + | | +-----------------------------+ | | +-----------------------------+ | | + | | | | | | | | + | | | | +-----------------------------+ | | + | | | | | | + +----------------+ | | +-----------------------------+ | | +-----------------------------+ | | +-------------------+ + | mydriver +------=>+ struct my_driver +------->+ struct device_driver +-------+---->| struct kobject | + +----------------+ | | +-----------------------------+ | | +-----------------------------+ | | | +-------------------+ + | | | | | | | | | name | | | | | k_name | + +----------------+ | | +-----------------------------+ | | +-----------------------------+ | | | +-------------------+ + | | | my_register_driver() | | | | driver_register() | | | | | kobject_add() | + | | | my_unregister_driver() | | | | driver_unregister() | | | | | kobject_delete() | + | | +-----------------------------+ | | +-----------------------------+ | | | +-------------------+ + | | | | | | | + | | | | | | | + +----------------+ | | +-----------------------------+ | | +-----------------------------+ | | | + | mydevice +------=>+ struct my_device +------->+ struct device +-------+ + +----------------+ | | +-----------------------------+ | | +-----------------------------+ | | + | | | | | | | | | bus_id | | | + +----------------+ | | +-----------------------------+ | | +-----------------------------+ | | + | | | my_register_device() | | | | device_register() | | | + | | | my_unregister_device() | | | | device_unregister() | | | + | | +-----------------------------+ | | +-----------------------------+ | | + | | | | | | + +--+ +--+ +--+ + + +The kobject structure +--------------------- + +A kobject structure does not perform a single function. This structure is +usually integrated into a larger one. A kobject structure actually +incorporates a set of features that will be offered to a higher abstraction +object in the Linux Device Model hierarchy. + +For example, the cdev structure has the following definition: + +.. code-block:: c + + struct cdev { + struct kobject kob; + struct module *owner; + const struct file_operations *ops; + struct list_head list; + dev_t dev; + unsigned int count; + }; + + +Note that this structure includes a ``kobject`` structure field. + +A kobject structure is defined as follows: + +.. code-block:: c + + struct kobject { + const char *name; + struct list_head entry; + struct kobject *parent; + struct kset *kset; + struct kobj_type *ktype; + struct sysfs_dirent *sd; + struct kref kref; + unsigned int state_initialized:1; + unsigned int state_in_sysfs:1; + unsigned int state_add_uevent_sent:1; + unsigned int state_remove_uevent_sent:1; + unsigned int uevent_suppress:1; + }; + +As we can see, the kobject structures are in a hierarchy: an object has a +parent and holds a kset member, which contains objects on the same level. + +Working with the structure involves initializing it with the +:c:func:`kobject_init` function. +Also in the initialization process it is necessary to set the name of the +``kobject`` structure, which will appear in sysfs, using the +:c:func:`kobject_set_name` function. + +Any operation on a kobject is done by incrementing its internal counter using +:c:func:`kobject_get`, or decrementing if it is no longer used using +:c:func:`kobject_put`. +Thus, a kobject object will only be released when its internal counter reaches 0. +A method of notifying this is needed so that the resources associated with the +device structure which included the kobject structure are released +(for example, cdev). +The method is called ``release`` and is associated with the object via the ktype +field (:c:type:`struct kobj_type`). + +The kobject structure is the basic structure of the Linux Device Model. +The structures in the higher levels of the model are :c:type:`struct bus_type`, +:c:type:`struct device` and :c:type:`struct device_driver`. + +Buses +----- + +A bus is a communication channel between the processor and an input/output +device. To ensure that the model is generic, all input/output devices are +connected to the processor via such a bus (even if it can be a virtual one +without a physical hardware correspondent). + +When adding a system bus, it will appear in the sysfs file system in +``/sys/bus``. +As with kobjects, buses can be organized into hierarchies and will be represented +in sysfs. + +In the Linux Device Model, a bus is represented by the structure +:c:type:`struct bus_type`: + +.. code-block:: c + + struct bus_type { + const char *name; + const char *dev_name; + struct device *dev_root; + struct bus_attribute *bus_attrs; + struct device_attribute *dev_attrs; + struct driver_attribute *drv_attrs; + struct subsys_private *p; + + int (*match)(struct device *dev, struct device_driver *drv); + int (*uevent)(struct device *dev, struct kobj_uevent_env *env); + int (*probe)(struct device *dev); + int (*remove)(struct device *dev); + //... + }; + +It can be noticed that a bus has a name, lists of default attributes, a number +of specific functions, and the driver's private data. +The ``uevent`` function (formerly ``hotplug``) is used with hotplug devices. + +Bus operations are the registration, the implementation of the operations +described in the :c:type:`struct bus_type` structure and the iteration and +inspection of the devices connected to the bus. + +A bus is registered using :c:func:`bus_register`, and unregistered using +:c:func:`bus_unregister`. + +Implementation example: + +.. code-block:: c + + #include + /* mybus.c */ + + //bus type + struct bus_type my_bus_type = { + .name = "mybus", + .match = my_match, + .uevent = my_uevent, + }; + + static int __init my_bus_init(void) + { + int err; + + //... + err = bus_register(&my_bus_type); + if (err) + return err; + //... + } + + static void __exit my_bus_exit(void) + { + //... + bus_unregister(&my_bus_type); + //... + } + + +The functions that will normally be initialized within a bus_type structure are +``match`` and ``uevent``: + +.. code-block:: c + + #include + #include + /* mybus.c */ + + // match devices to drivers; just do a simple name test + static int my_match(struct device *dev, struct device_driver *driver) + { + return !strncmp(dev_name(dev), driver->name, strlen(driver->name)); + } + + // respond to hotplug user events; add environment variable DEV_NAME + static int my_uevent(struct device *dev, struct kobj_uevent_env *env) + { + add_uevent_var(env, "DEV_NAME=%s", dev_name(dev)); + return 0; + } + +The ``match`` function is used when a new device or a new driver is added to the +bus. Its role is to make a comparison between the device ID and the driver ID. +The ``uevent`` function is called before generating a hotplug in user-space and +has the role of adding environment variables. + +Other possible operations on a bus are iterating over the drivers or devices +attached to it. +Although we can not directly access them (lists of drivers and devices +being stored in the private data of the driver, the ``subsys_private *p`` field), +these can be iterated using the :c:macro:`bus_for_each_dev` and +:c:macro:`bus_for_each_drv` macros. + +The Linux Device Model interface allows you to create attributes for the +associated objects. These attributes will have a corresponding file in the +bus subdirectory in sysfs. The attributes associated with a bus are +described by the bus_attribute structure : + +.. code-block:: c + + struct bus_attribute { + struct attribute attr; + ssize_t (*show)(struct bus_type *, char *buf); + ssize_t (*store)(struct bus_type *, const char *buf, size_t count); + }; + +Typically, an attribute is defined by the :c:macro:`BUS_ATTR` macro. +The :c:func:`bus_create_file` and :c:func:`bus_remove_file` functions can be +used to add/delete an attribute within the bus structure. + +An example of defining an attribute for ``my_bus`` is shown below: + +.. code-block:: c + + /* mybus.c */ + + #define MY_BUS_DESCR "SO2 rules forever" + + // export a simple bus attribute + static ssize_t my_show_bus_descr(struct bus_type *bus, char *buf) + { + return snprintf(buf, PAGE_SIZE, "%s\n", MY_BUS_DESCR); + } + + /* + * define attribute - attribute name is descr; + * full name is bus_attr_descr; + * sysfs entry should be /sys/bus/mybus/descr + */ + BUS_ATTR(descr, 0444, my_show_bus_descr, NULL); + + // specify attribute - in module init function + static int __init my_bus_init(void) + { + int err; + //... + err = bus_create_file(&my_bus_type, &bus_attr_descr); + if (err) { + /* handle error */ + } + //... + } + + static void __exit my_bus_exit(void) + { + //... + bus_remove_file(&my_bus_type, &bus_attr_descr); + //... + } + +The bus is represented by both a ``bus_type`` object and a ``device`` object, +as we will see later (the bus is also a device). + + +Devices +------- + +Any device in the system has a :c:type:`struct device` structure associated +with it. +Devices are discovered by different kernel methods (hotplug, device drivers, +system initialization) and are registered in the system. Each device present in +the kernel has an entry in ``/sys/devices``. + +At the lowest level, a device in Linux Device Model is represented by a +:c:type:`struct device` structure: + +.. code-block:: c + + struct device { + //... + struct device *parent; + struct device_private *p; + struct kobject kobj; + + const char *init_name; /* initial name of the device */ + //... + struct bus_type *bus; /* type of bus device is on */ + struct device_driver *driver; /* which driver has allocated this + device */ + //... + void (*release)(struct device *dev); + }; + +Structure fields include the parent device that is usually a controller, the +associated ``kobject``, the bus it is connected to, the device driver, and a +function called when the device counter reaches 0 (``release``). + +As usual, we have the registration/unregistration functions +:c:func:`device_register` and :c:func:`device_unregister`. + +To work with attributes, we have structure :c:type:`struct device_attribute`, +the macro :c:macro:`DEVICE_ATTR` for definition, and the functions +:c:func:`device_create_file` and :c:func:`device_remove_file` for adding/removing +the attribute to/from the device. + +One important thing to note is that the :c:type:`struct device` structure is +usually not used directly, but it is added to another structure. For example: + +.. code-block:: c + + // my device type + struct my_device { + char *name; + struct my_driver *driver; + struct device dev; + }; + +Typically, a bus driver will export functions to add or remove such a +device, as shown below: + +.. code-block:: c + + /* mybus.c */ + + /* BUS DEVICE (parent) */ + + // parent device release + static void my_bus_device_release(struct device *dev) + { + } + + // parent device + static struct device my_bus_device = { + .init_name = "mybus0", + .release = my_bus_device_release + }; + + /* DEVICE */ + + /* + * as we are not using the reference count, we use a no-op + * release function + */ + static void my_dev_release(struct device *dev) + { + } + + int my_register_device(struct my_device *mydev) + { + mydev->dev.bus = &my_bus_type; + mydev->dev.parent = &my_bus_device; + mydev->dev.release = my_dev_release; + dev_set_name(&mydev->dev, mydev->name); + + return device_register(&mydev->dev); + } + + void my_unregister_device(struct my_device *mydev) + { + device_unregister(&mydev->dev); + } + + /* export register/unregister device functions */ + EXPORT_SYMBOL(my_register_device); + EXPORT_SYMBOL(my_unregister_device); + +As seen, the functions ``my_register_device`` and ``my_unregister_device``, used +to add/remove a device to/from a bus, are defined in the same file where the +bus is defined. Device structures are not initialized; they will be initialized +when the devices are discovered by the system (by hotplug or direct registration +from driver) and the function ``my_register_device`` will be called to add a +device to the bus. + +To use the bus defined above in the driver implementation, we must define a +structure of type ``my_device``, initialize it and register it using the function +exported by the bus (``my_register_device``). + +.. code-block:: c + + /* mydriver.c */ + + static struct my_device mydev; + char devname[NAME_SIZE]; + //... + + //register + int err; + + sprintf(devname, "mydev0"); + mydev.name = devname; + mydev.driver = &mydriver; + dev_set_drvdata(&mydev.dev, &mydev); + err = my_register_device(&mydev); + if (err < 0) { + /*handle error */ + } + + //.. + + //unregister + my_unregister_device(&mydev); + +Drivers +------- + +Linux Device Model is used to allow simple association between system +devices and drivers. Drivers can export information independent of the physical +device. + +In sysfs, driver information has no single subdirectory associated; They can be +found in the directory structure in different places: the loaded module is in +``/sys/module``, in ``/sys/devices`` you can find the driver associated with +each device, in ``/sys/class`` the drivers belonging to a class, in +``/sys/bus`` the drivers associated to each bus. + +A device driver is identified by the structure :c:type:`struct device_driver`: + +.. code-block:: c + + struct device_driver { + const char *name; + struct bus_type *bus; + + struct driver_private *p; + + struct module *owner; + const char *mod_name; /* used for built-in modules */ + + int (*probe) (struct device *dev); + int (*remove) (struct device *dev); + void (*shutdown) (struct device *dev); + int (*suspend) (struct device *dev, pm_message_t state); + int (*resume) (struct device *dev); + }; + +Among the structure fields we find the name of the driver (appears in ``sysfs``), +the bus with which the driver works, and functions called at various times in a +device's operation. + +As before, we have the functions :c:func:`driver_register` and +:c:func:`driver_unregister` to register/unregister a driver. + +To work with attributes, we have the :c:type:`struct driver_attribute` structure, +the macro :c:type:`DRIVER_ATTR` for definition, and the functions +:c:func:`driver_create_file` and :c:func:`driver_remove_file` functions for +adding the attribute to the device. + +As with devices, the structure :c:type:`struct device_driver` is usually +incorporated into another structure specific to a particular bus (PCI, USB, etc.): + +.. code-block:: c + + /* mybus.c */ + + // my driver type + struct my_driver { + struct module *module; + struct device_driver driver; + }; + + #define to_my_driver(drv) container_of(drv, struct my_driver, driver); + + int my_register_driver(struct my_driver *driver) + { + int err; + + driver->driver.bus = &my_bus_type; + err= driver_register(&driver->driver); + if (err) + return err; + return 0; + } + + void my_unregister_driver(struct my_driver *driver) + { + driver_unregister(&driver->driver); + } + + /* export register/unregister driver functions */ + EXPORT_SYMBOL(my_register_driver); + EXPORT_SYMBOL(my_unregister_driver); + +Driver registration/unregistration operations are exported for use in +other modules. + +As for devices, the operations for drivers are defined when the bus is +initialized and they are exported to be used by drivers. When implementing a +driver that works with devices attached to the bus, we will call the functions +``my_register_driver`` and ``my_unregister_driver`` to associate with the bus. + +To use the functions (in the driver implementation), we must declare a structure +of type ``my_driver``, initialize it and register using the function exported +by the bus. + +.. code-block:: c + + /* mydriver.c */ + + static struct my_driver mydriver = { + .module = THIS_MODULE, + .driver = { + .name = "mydriver", + }, + }; + //... + + //register + int err; + err = my_register_driver(&mydriver); + if (err < 0) { + /*handle error */ + } + //.. + + //unregister + my_unregister_driver(&mydriver); + + +Classes +------- + +A class is a high-level view of the Linux Device Model, which abstracts +implementation details. For example, there are drivers for SCSI and ATA +drivers, but all belong to the class of disks. Classes provide a grouping of +devices based on functionality, not how they are connected or how they work. +Classes have a correspondent in ``/sys/classes``. + +There are two main structures that describe the classes: :c:type:`struct class` +and :c:type:`struct device`. +The class structure describes a generic class, while the structure +:c:type:`struct device` describes a class associated with a device. +There are functions for initializing/deinitiating and adding attributes for each +of these, described in ``include/linux/device.h``. + +The advantage of using classes is that the ``udev`` program in userspace, which we +will discuss later, allows the automatic creation of devices in the ``/dev`` +directory based on class information. + +For this reason, we will continue to present a small set of functions that work +with classes to simplify the use of the plug and play mechanism. + +A generic class is described by structure class structure: + +.. code-block:: c + + struct class { + const char *name; + struct module *owner; + struct kobject *dev_kobj; + + struct subsys_private *p; + + struct class_attribute *class_attrs; + struct class_device_attribute *class_dev_attrs; + struct device_attribute *dev_attrs; + + int (*dev_uevent)(struct device *dev, struct kobj_uevent_env *env); + void (*class_release)(struct class *class); + void (*dev_release)(struct device *dev); + //... + }; + +The :c:func:`class_register` and :c:func:`class_unregister` functions can be +used for initialization/deinitialization. + +.. code-block:: c + + static struct class my_class = { + .name = "myclass", + }; + + static int __init my_init(void) + { + int err; + //... + err = class_register(&my_class); + if (err < 0) { + /* handle error */ + } + //... + } + + static void __exit my_cleanup(void) + { + //... + class_unregister(&my_class); + //... + } + +A class associated with a device is described by the :c:type:`struct device` +structure. +The :c:func:`device_create` and :c:func:`device_destroy` functions can be used +for initialization/deinitialization. +The :c:func:`device_create` function initializes the ``device`` structure, +and assigns the generic ``class`` structure and the device received as a +parameter to it; +In addition, it will create an attribute of the class, ``dev``, which contains +the minor and major of the device (``minor:major``). +Thus, udev utility in usermode can read the necessary data from this attribute +file to create a node in the ``/dev`` directory by calling ``makenod``. + +An example of initialization: + +.. code-block:: c + + struct device* my_classdev; + struct cdev cdev; + struct device dev; + + //init class for device cdev.dev + my_classdev = device_create(&my_class, NULL, cdev.dev, &dev, "myclass0"); + + //destroy class for device cdev.dev + device_destroy(&my_class, cdev.dev); + +When a new device is discovered, a class and a node will be assigned to it and +a node will be created in the ``/dev`` directory. +For the example above, the node ``/dev/myclass0`` will be generated. + +Hotplug +------- + +``Hotplug`` describes the mechanism for adding or removing a device from the +system while it is running without having to reboot the system. + +A hotplug event is a notification from the kernel to the user-space when something +changes in the system configuration. These events are generated when creating +or removing a kobject from the kernel. Since these objects are the basis of the +Linux Device Model, being included in all structures (``struct bus_type``, +``struct device``, ``struct device_driver``, ``struct class``, etc.), a hotplug event +will be generated when any of these structures is created or removed (``uevent``). + +When a device is discovered in the system, an event is generated. Depending on +the point where it resides in Linux Device Model, the functions corresponding +to the event will be called (usually, the ``uevent`` function associated to the +bus or the class). Using these functions, the driver has the ability to set +system variables for the user-space. +The generated event then reaches the user-space. Here is the ``udev`` +utility that captures these events. There are configuration files for this +utility in the ``/etc/udev/`` directory. Different rules can be specified to +capture only certain events and perform certain actions, depending on the +system variables set in the kernel or in ``uevent`` functions. + +An important consequence is that in this way the plug and play mechanism can be +achieved; with the help of ``udev`` and the classes (described above), entries +in the ``/dev/`` directories can be automatically created for devices, and using +``udev`` drivers can be automatically loaded for a device. + +Rules for ``udev`` are located ``/etc/udev/rules.d``. +Any file that ends with ``.rules`` in this directory will be parsed when an +event occurs. For more details on how to write rules in these files see +`Writing udev rules `_. +For testing, there are utilities such as ``udevmonitor``, ``udevinfo`` and +``udevtest``. + +For a quick example, consider the situation where we want to automatically load +a driver for a device when an event occurs. We can create a new file +/etc/udev/rules.d/myrules.rules, we will have the following line: + +.. code-block:: bash + + SUBSYSTEM=="pnp", ATTRS{id}=="PNP0400", RUN+="/sbin/insmod /root/mydriver.ko" + +This will choose from the events generated only those belonging to the ``pnp`` +subsystem (connected to ``PNP`` bus) and having an id attribute with the value +``PNP0400``. + +When this rule will be found, the command specified under ``RUN`` will be +executed to insert the appropriate driver in the kernel. + + +Plug and Play +============= + +As noted above, in Linux Device Model all devices are connected by a bus, even if +it has a corresponding physical hardware or it is virtual. + +The kernel already has implemented most buses using a ``bus_type`` structure +and functions to register/unregister drivers and devices. +To implement a driver, we must first determine the bus to which the supported +devices are connected and use the structures and functions exported by this bus. +The main buses are ``PCI``, ``USB``, ``PNP``, ``IDE``, ``SCSI``, ``platform``, +``ACPI``, etc. + +PNP bus +------- + +The plug and play mechanism provides a means of detecting and setting the resources +for legacy driver that may not be configured or otherwise. All plug and play +drivers, protocols, services are based on Plug and Play level. It is responsible +for the exchange of information between drivers and protocols. The following +protocols are available: + + * ``PNPBIOS`` - used for systems such as serial and parallel ports + * ``ISAPNP`` - offers support for the ISA bus + * ``ACPI`` - offering, among other things, information about system-level devices + +The kernel contains a bus, called ``pnp_bus``, that is used for connecting by +many drivers. +The implementation and working with the bus follow the Linux Device Model and +is very similar to what we discussed above. + +The main functions and structures exported by the bus, which can be used by +drivers, are: + + * :c:type:`struct pnp_driver` - driver type associated to the bus + * :c:func:`pnp_register_driver` - function used to register a PNP driver in the system + * :c:func:`pnp_unregister_driver` - function used to unregister a PNP driver from the system + +As noted in previous sections, the bus has a function called ``match`` used to +associate the devices with the appropriate drivers. +For example, when discovering a new device, a driver which meets the condition +given by the ``match`` function regarding to the new device. Usually, this +condition is a comparation of IDs (driver id and device id). +A common approach is using a static table in each driver, which holds information +about the devices supported by the driver, which will be used by the bus +when verifying the condition. For example, for a parallel port device we have +the table ``parport_pc_pnp_tbl``: + +.. code-block:: c + + static const struct pnp_device_id parport_pc_pnp_tbl[] = { + /* Standard LPT Printer Port */ + {.id = "PNP0400", .driver_data = 0}, + /* ECP Printer Port */ + {.id = "PNP0401", .driver_data = 0}, + }; + + MODULE_DEVICE_TABLE(pnp, parport_pc_pnp_tbl); + +Each driver declares and initializes a structure ``pnp_driver``, such as +``parport_pc_pnp_driver``: + +.. code-block:: c + + static int parport_pc_pnp_probe(struct pnp_dev *dev, const struct pnp_id *card_id, + const struct pnp_id *dev_id); + static void parport_pc_pnp_remove(struct pnp_dev* dev); + + static struct pnp_driver parport_pc_pnp_driver = { + .name = "parport_pc", + .id_table = parport_pc_pnp_tbl, + .probe = parport_pc_pnp_probe, + .remove = parport_pc_pnp_remove, + }; + +We can notice that the structure has as fields a pointer to the table declared +above and two functions, which are called when a new device is detected and when +it is removed from the system. +As all the structures presented above, the driver must be registered to the +system: + +.. code-block:: c + + static int __init parport_pc_init(void) + { + err = pnp_register_driver(&parport_pc_pnp_driver); + if (err < 0) { + /* handle error */ + } + } + + static void __exit parport_pc_exit(void) + { + pnp_unregister_driver(&parport_pc_pnp_driver); + } + +PNP operations +-------------- + +So far we have discussed the Linux Device Model and its API. To +implement a plug and play driver, we must respect the Linux Device Model model. + +Most often, adding a bus in the kernel is not necessary, as most of the existing +buses are already implemented (PCI, USB, etc.). Thus, we must first identify the +bus to which the device is attached. +In the examples below, we will consider that this bus is bus PNP and we will +use the structures and functions described above. + +.. ditaa:: + + + Kernel space | User space + | + | + +-------------+ +-------------+ +---------------+ | +--------+ + | | | | | | | | | + | my_device | | my_driver | | my_bus_type | | | udev | + | | | | | | | | | + +-----+-------+ +------+------+ +-------+-------+ | +---+----+ + | | | | | + : : : | : + | | 1.my_register_driver() | 2.call_usermodehelper() | + | +-+------------------------->+-+------------------------->+-+ + | | | | | | | | + | | | | | | | | + | | | | | | | | + | 3.my_uevent() | | | | 4.call_usermodehelper() | | + +++-------------------------| |--------------------------> +------------------------->| | + | | | | | | | | | + | | | | 6.my_probe() | | 5.my_match() | | + | | | |<=------------------------| |<=------------------------| | + | | | | | | | | | + | | | | | | | | | + | | | | | | | | | + | | 7.my_remove() | | 8.my_uevent() | | 9.call_usermodehelper() | | +---------------------------+ + +-+------------------------>| |------------------------->| |------------------------->| | | | + | | | | | | | | | 1 - 2 -> add driver | + | | | | | | | | | 3 - 6 -> add device | + | | | | | | | | | 7 - 9 -> remove device | + | | | 10.my_unregister_driver()| | 11.call_usermodehelper() | | | 10 - 11 -> remove driver | + | +-+------------------------->+-+------------------------->+-+ | | + | | | | | +---------------------------+ + : : : | : + + +Adding a driver +--------------- + +In addition to the usual operations, a driver must follow the Linux Device Model. +Thus, it will be registered in the system using the functions provided by +the bus for this purpose. +Usually, the bus provides a particular driver structure containing a +:c:type:`struct device_driver` structure, that the driver must initialize and +register using a function ``*_register_driver``. +For example, for the ``PNP`` bus, the driver must declare and initialize a +structure of type :c:type:`struct pnp_driver` and register it using +``pnp_register_drvier``: + +.. code-block:: c + + static struct pnp_driver my_pnp_driver = { + .name = "mydriver", + .id_table = my_pnp_tbl, + .probe = my_pnp_probe, + .remove = my_pnp_remove, + }; + + static int __init my_init(void) + { + err = pnp_register_driver(&my_pnp_driver); + } + +Unlike legacy drivers, plug and play drivers don't register devices at +initialization in the init function (``my_init`` in the example above) using +:c:func:`register_device`. + +As described above, each bus has a `match` function which is called when a new +device is detected in the system to determine the associated driver. +Thus, there must be a way for each driver to export information about the +devices it supports, to allow this check to pass and have its functions further +called. +In the examples presented in this lab, the match function does a simple +comparison between the device name and the driver name. Most drivers use a table +containing information devices and store a pointer to this table in the +driver structure. +For example, a driver associated to a ``PNP`` bus defines a table of type +:c:type:`struct pnp_device_id` and initializes the field ``id_table`` from the +structure ``pnp_driver my_pnp_driver`` with a pointer to it: + +.. code-block:: c + + static const struct pnp_device_id my_pnp_tbl[] = { + /* Standard LPT Printer Port */ + {.id = "PNP0400", .driver_data = 0}, + /* ECP Printer Port */ + {.id = "PNP0401", .driver_data = 0}, + { } + }; + + MODULE_DEVICE_TABLE(pnp,my_pnp_tbl); + + static struct pnp_driver my_pnp_driver = { + //... + .id_table = my_pnp_tbl, + //... + }; + +In the example above, the driver supports multiple parallel port devices, +defined in the table ``my_pnp_tbl``. This information is used by the bus in +the ``match_device`` function. +When adding a driver, the bus driver will be associated to it and new entires +in ``sysfs`` will be created based on the driver name. +Then the bus ``match`` function will be called for every supported device, +to associate the driver with any connected device that it supports. + +Removing a driver +----------------- + +To remove a driver from the kernel, in addition to operations required for a +legacy driver, we must unregister the ``device_driver`` structure. +For a driver associated with the ``PNP`` bus, we must unregister the ``pnp_driver`` +structure using the :c:func:`pnp_unregister_driver` function: + +.. code-block:: c + + static struct pnp_driver my_pnp_driver; + + static void __exit my_exit(void) + { + pnp_unregister_driver(&my_pnp_driver); + } + +Unlike legacy drivers, plug and play drivers don't unregister devices in the +module unload function (``my_exit``). When a driver is removed, all the +references to it will be removed for all the devices it supports, and entries +from ``sysfs`` will also be removed. + +Adding a new device +------------------- + +As we saw above, plug and play drivers do not register devices at initialization. +This operation will take place in the ``probe`` function, which is called when +a new device is detected. A device attached to the ``PNP`` bus will be added to +the system by the function ``probe`` from the ``pnp_driver`` structure: + +.. code-block:: c + + static int my_pnp_probe(struct pnp_dev *dev, const struct pnp_id *card_id, + const struct pnp_id *dev_id) { + int err, iobase, nr_ports, irq; + + //get irq & ports + if (pnp_irq_valid(dev, 0)) + irq = pnp_irq(dev, 0); + if (pnp_port_valid(dev, 0)) { + iobase = pnp_port_start(dev, 0); + } else + return -ENODEV; + nr_ports = pnp_port_len(dev, 0); + + /* register device dev */ + } + + static struct pnp_driver my_pnp_driver = { + //... + .probe = my_pnp_probe, + //... + }; + +Upon detection of a device in the kernel (at boot or by the insertion of the +device through ``hotplug``), an interrupt is generated and reaches the bus +driver. +The device is registered using the function :c:func:`device_register` and it is +attached to the bus. A call to the user space will also be generated, and the +event can be treated by ``udev``. Then, the list of drivers associated with the +bus is iterated and the ``match`` function is called for each of them. +The ``match`` function tries to find a driver for the new device. After a +suitable driver is found for the device, the ``probe`` function of the driver +is called. If the function ends successfully, the device is added to the driver's +list of devices and new entries are created in ``sysfs`` based on the device name. + +Removing a device +----------------- + +As we saw above, the plug and play drivers don't unregister devices when the +driver is unloaded. This operation is done in the ``remove`` function, which +is called when a device is removed from the system. +In case of a device attached to the ``PNP`` bus, the unregister will be done +in the ``remove`` function specified in the ``pnp_driver`` structure: + +.. code-block:: c + + static void my_pnp_remove(struct pnp_dev *dev) { + /* unregister device dev */ + } + + static struct pnp_driver my_pnp_driver = { + //... + .remove = my_pnp_remove, + }; + +As seen in the example above, when the removal of a device is detected, the +``my_pnp_remove`` function is called. A user-space call is also generated, which +can be detected by ``udev``, and entries are removed from ``sysfs``. + +Exercises +========= + +.. include:: ../labs/exercises-summary.hrst +.. |LAB_NAME| replace:: device_model + +0. Intro +--------- + +Find the definitions of the following symbols in the Linux kernel: + + * functions ``dev_name``, ``dev_set_name``. + * functions ``pnp_device_probe``, ``pnp_bus_match``, ``pnp_register_driver`` + and the ``pnp_bus_type`` variable. + +1. Bus implementation +--------------------- + +Analyze the contents of the ``bex.c``, a module that implements a bus +driver. Follow the comments marked with **TODO 1** and implement the missing +functionality: register the bus driver and add a new device named ``root`` +with type ``none`` and version 1. + +.. hint:: See :c:func:`bex_add_dev`. + +.. hint:: The register and unregister must be done using :c:func:`bus_register` + and :c:func:`bus_unregister`. + +Load the module and verify that the bus is visible in ``/sys/bus``. Verify +that the device is visible in ``/sys/bus/bex/devices``. + +Remove the module and notice that the ``sysfs`` entries are removed. + +2. Add type and version device attributes +----------------------------------------- + +Add two read-only device attributes, ``type`` and ``version``. Follow the +**TODO 2** markings. + +.. hint:: You will need to add the two attributes in the structure + ``bex_dev_attrs``, as follows: + + ``&dev_attr_.attr,`` + +.. hint:: + + A possible implementation for the show function is the following: + + .. code-block:: c + + static ssize_t + type_show(struct device *dev, struct device_attribute *attr, char *buf) + { + struct bex_device *bex_dev = to_bex_device(dev); + + return sprintf(buf, "%s\n", bex_dev->type); + } + DEVICE_ATTR_RO(type); + +Observe that two new attributes are visible in +/sys/bus/bex/devices/root. Check the contents of these attributes. + +3. Add del and add bus attributes +--------------------------------- + +Add two write-only bus attributes, ``del`` and ``add``. del expects the name +of a device to delete, while add expects the name, type and version to +create a new device. Follow the **TODO 3** markings and review +`Buses`_. + +.. hint:: Use :c:func:`sscanf` to parse the input from sysfs and + :c:func:`bex_del_dev` and :c:func:`bex_add_dev` to delete + and create a new device. + +An example for the store function is the following: + +.. code-block:: c + + static ssize_t add_store(struct bus_type *bt, const char *buf, size_t count) + { + char name[32]; + int ret; + + ret = sscanf(buf, "%31s", name); + if (ret != 1) + return -EINVAL; + + ... + } + BUS_ATTR(add, S_IWUSR, NULL, add_store); + +.. hint:: The store functions should return ``0`` if + ``bex_add_dev``/``bex_del_dev`` fail and ``count`` otherwise. + +Create a new device and observe that is visible in +``/sys/bus/devices``. Delete it and observe it disapears from ``sysfs``. + +.. hint:: Use echo to write into the bus attributes: + + .. code-block:: shell + + $ echo "name type 1" > /sys/bus/bex/add + $ echo "name" > /sys/bus/bex/del + +4. Register the bex misc driver +------------------------------- + +Modify **bex-misc.c** so that it registers the driver with the bex +bus. Insert the ``bmx_misc.ko`` module and create a new bex device from +sysfs with the name "test", type "misc", version 2. Follow the **TODO +4** markings. + +Observe that the driver is visible in ``/sys/bus/bex/drivers``. + +Why isn't the probe function called? + +.. hint:: Notice that the bus match function in **bex.c** is not + implemented. + +Implement the bus matching function in **bex.c**. Follow the **TODO 5** +markings. Try again to create a new bex device and observe that this +time the ``probe`` function from the ``bex_misc`` driver is called. + +5. Register misc device in the bex_misc probe function +------------------------------------------------------ + +Modify **bex_misc.c** to refuse probing if ``version > 1``. Also, register the +defined misc device in ``bex_misc_probe`` and deregister it in +``bex_misc_remove``. Follow the **TODO 6** markings. + +.. hint:: Use :c:func:`misc_register` and :c:func:`misc_deregister`. + +Create a new device with the name "test", type "misc" and version 2 +and observe that the probe fails. Create a new device with the name +"test2", type "misc" and version 1 and observe that the probe is +successful. + +Inspect ``/sys/bus/bex/devices/test2`` and observe that we have a new +entry. Identify the major and minor for the misc device, create a +character device file and try to read and write from the misc device +buffer. + +.. hint:: The major and minor should be visible in the dev attribute + of the misc device + +6. Monitor uevent notifications +------------------------------- + +Use the ``udevadm monitor`` command and observe what happens when: + +* the ``bex.ko`` and ``bex_misc.ko`` modules are inserted + +* a new device with the type "type" is created + +* a new device with the type "misc" and version 2 is created + +* a new device with the type "misc" and version 1 is created + +* all of the above are removed diff --git a/Documentation/teaching/labs/exercises-summary.hrst b/Documentation/teaching/labs/exercises-summary.hrst new file mode 100644 index 00000000000000..92526245db1503 --- /dev/null +++ b/Documentation/teaching/labs/exercises-summary.hrst @@ -0,0 +1,55 @@ +.. important:: + + We strongly encourage you to use the setup from `this repository `__. + + To solve exercises, you need to perform these steps: + * prepare skeletons from templates + * build modules + * start the VM and test the module in the VM. + + The current lab name is |LAB_NAME|. See the exercises for the task name. + + The skeleton code is generated from full source examples located in + :file:`tools/labs/templates`. To solve the tasks, start by generating + the skeleton code for a complete lab: + + .. code-block:: shell + + tools/labs $ make clean + tools/labs $ LABS= make skels + + You can also generate the skeleton for a single task, using + + .. code-block:: shell + + tools/labs $ LABS=/ make skels + + Once the skeleton drivers are generated, build the source: + + .. code-block:: shell + + tools/labs $ make build + + Then, start the VM: + + .. code-block:: shell + + tools/labs $ make console + + The modules are placed in /home/root/skels/|LAB_NAME|/. + + You DO NOT need to STOP the VM when rebuilding modules! + The local `skels` directory is shared with the VM. + + Review the `Exercises`_ section for more detailed information. + +.. warning:: + + Before starting the exercises or generating the skeletons, please run **git pull** inside the Linux repo, + to make sure you have the latest version of the exercises. + + If you have local changes, the pull command will fail. Check for local changes using ``git status``. + If you want to keep them, run ``git stash`` before ``pull`` and ``git stash pop`` after. + To discard the changes, run ``git reset --hard master``. + + If you already generated the skeleton before ``git pull`` you will need to generate it again. \ No newline at end of file diff --git a/Documentation/teaching/labs/filesystems_part1.rst b/Documentation/teaching/labs/filesystems_part1.rst new file mode 100644 index 00000000000000..0fd3efa2bdb836 --- /dev/null +++ b/Documentation/teaching/labs/filesystems_part1.rst @@ -0,0 +1,796 @@ +============================ +File system drivers (Part 1) +============================ + +Lab objectives +============== + + * acquiring knowledge about the Virtual Filesystem (VFS) in Linux and understanding concepts regarding 'inode', 'dentry', 'file', superblock and data block. + * understanding the process of mounting a file system inside VFS. + * knowledge regarding various file system types and understanding differences between file systems with physical support (on disk) and the ones without physical support. + +Virtual Filesystem (VFS) +======================== + +The Virtual Filesystem (also known as VFS) is a component of the kernel that handles all system calls related to files and file systems. +VFS is a generic interface between the user and a particular file system. +This abstraction simplifies the implementation of file systems and provides an easier integration of multiple file systems. This way, the implementation of a file system is accomplished by using the API provided by the VFS, and the generic hardware and I/O subsystem communication parts are handled by VFS. + +From a functional point of view, file systems can be grouped into: + + * disk file systems (ext3, ext4, xfs, fat, ntfs, etc.) + * network file systems (nfs, smbfs/cifs, ncp, etc.) + * virtual filesystems (procfs, sysfs, sockfs, pipefs, etc.) + +A Linux kernel instance will use VFS for the hierarchy (a tree) of directories and files. +A new file system will be added as a VFS subtree using the mount operation. +A file system is usually mounted from the environment for which it was built (from a block type device, from network, etc.). +In particular, however, the VFS can use a normal file as a virtual block device, so it is possible to mount disk file systems over normal files. This way, stacks of file systems can be created. + +The basic idea of VFS is to provide a single file model that can represent files from any file system. +The file system driver is responsible for bringing to the common denominator. +This way the kernel can create a single directory structure that contains the entire system. +There will be a file system that will be the root, the rest being mounted in its various directories. + +The general file system model +============================= + +The general file system model, to which any implemented file system needs to be reduced, consists of several well-defined entities: :c:type:`superblock`, :c:type:`inode`, :c:type:`file`, and :c:type:`dentry`. +These entities are file system metadata (they contain information about data or other metadata). + +Model entities interact using some VFS or kernel subsystems: dentry cache, inode cache, buffer cache. +Each entity is treated as an object: it has a associated data structure and a pointer to a table of methods. The induction of particular behavior for each component is done by replacing the associated methods. + +superblock +---------- + +The superblock stores the information needed for a mounted file system: + + * inode and blocks locations + * file system block size + * maximum filename length + * maximum file size + * the location of the root inode + +Localization: +~~~~~~~~~~~~~ + + * In the case of disk file systems, the superblock has a correspondent in the first block of the disk. (Filesystem Control Block). + * In VFS, all superblocks of filesystems are retained in a list of structures of type :c:type:`struct super_block` and the methods in structures of type :c:type:`struct super_operations`. + +inode +----- + +The inode (index node) keeps information about a file in the general sense (abstraction): regular file, directory, special file (pipe, fifo), block device, character device, link, or anything that can be abstracted as a file. + +An inode stores information like: + + * file type; + * file size; + * access rights; + * access or modify time; + * location of data on the disk (pointers to disk blocks containing data). + +.. note:: + Usually, the inode does not contain the file name. The name is stored by the :c:type:`dentry` entity. This way, an inode can have multiple names (hardlinks). + +Localization: +~~~~~~~~~~~~~ + +Like the superblock, the :c:type:`inode` has a disk correspondent. +The inodes on disk are generally grouped into a specialized area (inode area) separated from the data blocks area; In some file systems, the equivalents of the inodes are spread in the file system structure (FAT); +As a VFS entity, an inode is represented by the structure :c:type:`struct inode` and by the operations with it defined in the structure :c:type:`struct inode_operations`. + +Each inode is generally identified by a number. On Linux, the ``-i`` argument of the ``ls`` command shows the inode number associated with each file: + +.. code-block:: console + + razvan@valhalla:~/school/so2/wiki$ ls -i + 1277956 lab10.wiki 1277962 lab9.wikibak 1277964 replace_lxr.sh + 1277954 lab9.wiki 1277958 link.txt 1277955 homework.wiki + +file +---- + +File is the component of the file system model that is closest to the user. +The structure exists only as a VFS entity in memory and has no physical correspondent on disk. + +While the inode abstracts a file on the disk, the file structure abstracts an open file. +From the point of view of the process, the file entity abstracts the file. From the point of view of the file system implementation, however, the inode is the entity that abstracts the file. + +The file structure maintains information such as: + + * file cursor position; + * file opening rights; + * pointer to the associated inode (eventually its index). + +Localization: +~~~~~~~~~~~~~ + + * The structure :c:type:`struct file` is the associated VFS entity, and the structure :c:type:`struct file_operations` represents the operations associated with it. + +dentry +------ + +The dentry (directory entry) associates an inode with a file name. + +Generally, a dentry structure contains two fields: + + * an integer that identifies the inode; + * a string representing its name. + +The dentry is a specific part of a path that can be a directory or a file. For example, for the path ``/bin/vi``, dentry objects will be created for ``/``, ``bin``, and ``vi`` (a total of 3 dentry objects). + + * the dentry has a correspondent on the disk, but the correspondence is not direct because each file system keeps the dentries in a specific way + * in VFS, the dentry entity is represented by the structure :c:type:`struct dentry` and the operations with it are defined in the :c:type:`struct dentry_operations` structure. + +.. _RegisterUnregisterSection: + +Register and unregister filesystems +=================================== + +In the current version, the Linux kernel supports about 50 file systems, including: + + * ext2/ ext4 + * reiserfs + * xfs + * fat + * ntfs + * iso9660 + * udf for CDs and DVDs + * hpfs + +On a single system, however, it is unlikely that there will be more than 5-6 file systems. For this reason, file systems (or, more correctly, file system types) are implemented as modules and can be loaded or unloaded at any time. + +In order to be able to dynamically load / unload a file system module, a file system registration / deregistration API is required. The structure describing a particular file system is :c:type:`struct file_system_type`: + + .. code-block:: c + + #include + + struct file_system_type { + const char *name; + int fs_flags; + struct dentry *(*mount) (struct file_system_type *, int, + const char *, void *); + void (*kill_sb) (struct super_block *); + struct module *owner; + struct file_system_type * next; + struct hlist_head fs_supers; + struct lock_class_key s_lock_key; + struct lock_class_key s_umount_key; + //... + }; + + * ``name`` is a string representing the name that will identify a file system (the argument passed to ``mount -t``). + * ``owner`` is ``THIS_MODULE`` for file systems implemented in modules, and ``NULL`` if they are written directly into the kernel. + * The ``mount`` function reads the superblock from the disk in memory when loading the file system. The function is unique to each file system. + * The ``kill_sb`` function releases the super-block from memory. + * ``fs_flags`` specifies the flags with which the file system must be mounted. An example of such flag is ``FS_REQUIRES_DEV`` that specifies to VFS that the file system needs a disk (it is not a virtual file system). + * ``fs_supers`` is a list containing all the superblocks associated with this file system. Since the same file system can be mounted multiple times, there will be a separate superblock for each mount. + +The *registration of a file system* into the kernel is generally performed in the module initialization function. For registration, the programmer will have to + + #. initialize a structure of type :c:type:`struct file_system_type` with the name, the flags, the function that implements the superblock reading operation and the reference to the structure that identifies the current module + #. call the :c:func:`register_filesystem` function. + +When unloading the module, you must unregister the file system by calling the :c:func:`unregister_filesystem` function. + +An example of registering a virtual file system is found in the code for ``ramfs``: + +.. code-block:: c + + static struct file_system_type ramfs_fs_type = { + .name = "ramfs", + .mount = ramfs_mount, + .kill_sb = ramfs_kill_sb, + .fs_flags = FS_USERNS_MOUNT, + }; + + static int __init init_ramfs_fs(void) + { + if (test_and_set_bit(0, &once)) + return 0; + return register_filesystem(&ramfs_fs_type); + } + +.. _FunctionsMountKillSBSection: + +Functions mount, kill_sb +------------------------ + +When mounting the file system, the kernel calls the mount function defined within the structure :c:type:`file_system_type`. The function makes a set of initializations and returns a dentry (the structure :c:type:`struct dentry`) that represents the mount point directory. Usually :c:func:`mount` is a simple function that calls one of the functions: + + * :c:func:`mount_bdev`, which mounts a file system stored on a block device + * :c:func:`mount_single`, which mounts a file system that shares an instance between all mount operations + * :c:func:`mount_nodev`, which mounts a file system that is not on a physical device + * :c:func:`mount_pseudo`, a helper function for pseudo-file systems (``sockfs``, ``pipefs``, generally file systems that can not be mounted) + +These functions get as parameter a pointer to a function :c:func:`fill_super` that will be called after the superblock initialization to finish its initialization by the driver. An example of such a function can be found in the ``fill_super`` section. + +When unmounting the file system, the kernel calls :c:func:`kill_sb`, which performs cleanup operations and invokes one of the functions: + + * :c:func:`kill_block_super`, which unmounts a file system on a block device + * :c:func:`kill_anon_super`, which unmounts a virtual file system (information is generated when requested) + * :c:func:`kill_litter_super`, which unmounts a file system that is not on a physical device (the information is kept in memory) + +An example for a file system without disk support is the :c:func:`ramfs_mount` function in the ``ramfs`` file system: + +.. code-block:: c + + struct dentry *ramfs_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) + { + return mount_nodev(fs_type, flags, data, ramfs_fill_super); + } + +An example for a file system from disk is the :c:func:`minix_mount` function in the ``minix`` file system: + +.. code-block:: c + + struct dentry *minix_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) + { + return mount_bdev(fs_type, flags, dev_name, data, minix_fill_super); + } + +Superblock in VFS +================= + +The superblock exists both as a physical entity (entity on disk) and as a VFS entity (within the :c:type:`struct super_block` structure). +The superblock contains only metainformation and is used to write and read metadata from the disk (inodes, directory entries). +A superblock (and implicitly the :c:type:`struct super_block` structure) will contain information about the block device used, the list of inodes, a pointer to the inode of the file system root directory, and a pointer to the superblock operations. + +The :c:type:`struct super_block` structure +------------------------------------------ + +Part of the :c:type:`struct super_block` structure definition is presented below: + +.. code-block:: c + + struct super_block { + //... + dev_t s_dev; /* identifier */ + unsigned char s_blocksize_bits; /* block size in bits */ + unsigned long s_blocksize; /* block size in bytes */ + unsigned char s_dirt; /* dirty flag */ + loff_t s_maxbytes; /* max file size */ + struct file_system_type *s_type; /* filesystem type */ + struct super_operations *s_op; /* superblock methods */ + //... + unsigned long s_flags; /* mount flags */ + unsigned long s_magic; /* filesystem’s magic number */ + struct dentry *s_root; /* directory mount point */ + //... + char s_id[32]; /* informational name */ + void *s_fs_info; /* filesystem private info */ + }; + +The superblock stores global information for an instance of a file system: + * the physical device on which it resides + * block size + * the maximum size of a file + * file system type + * the operations it supports + * magic number (identifies the file system) + * the root directory ``dentry`` + +Additionally, a generic pointer (``void *``) stores the private data of the file system. +The superblock can be viewed as an abstract object to which its own data is added when there is a concrete implementation. + +.. _SuperblockSection: + +Superblock operations +--------------------- + +The superblock operations are described by the :c:type:`struct super_operations` structure: + +.. code-block:: c + + struct super_operations { + //... + int (*write_inode) (struct inode *, struct writeback_control *wbc); + struct inode *(*alloc_inode)(struct super_block *sb); + void (*destroy_inode)(struct inode *); + + void (*put_super) (struct super_block *); + int (*statfs) (struct dentry *, struct kstatfs *); + int (*remount_fs) (struct super_block *, int *, char *); + //... + }; + +The fields of the structure are function pointers with the following meanings: + + * ``write_inode``, ``alloc_inode``, ``destroy_inode`` write, allocate, respectively release resources associated with an inode and are described in the next lab + * ``put_super`` is called when the superblock is released at ``umount``; within this function, any resources (generally memory) from the file system's private data must be released; + * ``remount_fs`` is called when the kernel detects a remount attempt (mount flag ``MS_REMOUNTM``); most of the time here must be detected if a switch from read-only to read-write or vice versa is attempted; this can be done simply because both the old flags (in ``sb->s_flags``) and the new flags (the ``flags`` argument) can be accessed; ``data`` is a pointer to the data sent by :c:func:`mount` that represent file system specific options; + * ``statfs`` is called when a ``statfs`` system call is done (try ``stat –f`` or ``df``); this call must fill the fields of the :c:type:`struct kstatfs` structure, as it is done, for example, in the :c:func:`ext4_statfs` function. + +.. _FillSuperSection: + +The :c:func:`fill_super` function +===================================== + +As specified, the :c:func:`fill_super` function is called to terminate the superblock initialization. This initialization involves filling the :c:type:`struct super_block` structure fields and the initialization of the root directory inode. + +An example of implementation is the :c:func:`ramfs_fill_super` function which is called to initialize the remaining fields in the superblock: + +.. code-block:: c + + #include + + #define RAMFS_MAGIC 0x858458f6 + + static const struct super_operations ramfs_ops = { + .statfs = simple_statfs, + .drop_inode = generic_delete_inode, + .show_options = ramfs_show_options, + }; + + static int ramfs_fill_super(struct super_block *sb, void *data, int silent) + { + struct ramfs_fs_info *fsi; + struct inode *inode; + int err; + + save_mount_options(sb, data); + + fsi = kzalloc(sizeof(struct ramfs_fs_info), GFP_KERNEL); + sb->s_fs_info = fsi; + if (!fsi) + return -ENOMEM; + + err = ramfs_parse_options(data, &fsi->mount_opts); + if (err) + return err; + + sb->s_maxbytes = MAX_LFS_FILESIZE; + sb->s_blocksize = PAGE_SIZE; + sb->s_blocksize_bits = PAGE_SHIFT; + sb->s_magic = RAMFS_MAGIC; + sb->s_op = &ramfs_ops; + sb->s_time_gran = 1; + + inode = ramfs_get_inode(sb, NULL, S_IFDIR | fsi->mount_opts.mode, 0); + sb->s_root = d_make_root(inode); + if (!sb->s_root) + return -ENOMEM; + + return 0; + } + + +The kernel provides generic function to implement operations with file system structures. +The :c:func:`generic_delete_inode` and :c:func:`simple_statfs` functions used in the above code are such functions and can be used to implement the drivers if their functionality is sufficient. + +The :c:func:`ramfs_fill_super` function in the above code fills some fields in the superblock, then reads the root inode and allocates the root dentry. +Reading the root inode is done in the :c:func:`ramfs_get_inode` function, and consists of allocating a new inode using :c:func:`new_inode` and initializing it. In order to free the inode, :c:func:`iput` is used, and :c:func:`d_make_root` is used to allocate the root dentry. + +An example implementation for a disk file system is the :c:func:`minix_fill_super` function in the minix file system. +The functionality for the disk file system is similar to that of the virtual file system, with the exception of using the buffer cache. +Also, the minix file system keeps private data using the :c:type:`struct minix_sb_info` structure. +A large part of this function deals with the initialization of these private data. +The private data is allocated using the :c:func:`kzalloc` function and stored in the ``s_fs_info`` field of the superblock structure. + +VFS functions typically get as arguments the superblock, an inode and/or a dentry that contain a pointer to the superblock so that these private data can be easily accessed. + +.. _BufferCacheSection: + +Buffer cache +============ + +Buffer cache is a kernel subsystem that handles caching (both read and write) blocks from block devices. +The base entity used by buffer cache is the :c:type:`struct buffer_head` structure. +The most important fields in this structure are: + + * ``b_data``, pointer to a memory area where the data was read from or where the data must be written to + * ``b_size``, buffer size + * ``b_bdev``, the block device + * ``b_blocknr``, the number of block on the device that has been loaded or needs to be saved on the disk + * ``b_state``, the status of the buffer + +There are some important functions that work with these structures: + + * :c:func:`__bread`: reads a block with the given number and given size in a ``buffer_head`` structure; in case of success returns a pointer to the ``buffer_head`` structure, otherwise it returns ``NULL``; + * :c:func:`sb_bread`: does the same thing as the previous function, but the size of the read block is taken from the superblock, as well as the device from which the read is done; + * :c:func:`mark_buffer_dirty`: marks the buffer as dirty (sets the ``BH_Dirty`` bit); the buffer will be written to the disk at a later time (from time to time the ``bdflush`` kernel thread wakes up and writes the buffers to disk); + * :c:func:`brelse`: frees up the memory used by the buffer, after it has previously written the buffer on disk if needed; + * :c:func:`map_bh`: associates the buffer-head with the corresponding sector. + +Functions and useful macros +=========================== + +The super block typically contains a map of occupied blocks (by inodes, dentries, data) in the form of a bitmap (vector of bits). To work with such maps, it is recommend to use the following features: + + * :c:func:`find_first_zero_bit`, to find the first zero bit in a memory area. The size parameter means the number of bits in the search area; + * :c:func:`test_and_set_bit`, to set a bit and get the old value; + * :c:func:`test_and_clear_bit`, to delete a bit and get the old value; + * :c:func:`test_and_change_bit`, to invert the value of a bit and get the old value. + +The following macrodefinitions can be used to verify the type of an inode: + + * ``S_ISDIR`` (``inode->i_mode``) to check if the inode is a directory; + * ``S_ISREG`` (``inode->i_mode``) to check if the inode is a regular file (not a link or device file). + +Further reading +=============== + +#. Robert Love -- Linux Kernel Development, Second Edition -- Chapter + 12. The Virtual Filesystem +#. Understanding the Linux Kernel, 3rd edition - Chapter 12. The Virtual + Filesystem +#. `Linux Virtual File System (presentation)`_ +#. `Understanding Unix/Linux Filesystem`_ +#. `Creating Linux virtual filesystems`_ +#. `The Linux Documentation Project - VFS`_ +#. `The "Virtual File System" in Linux`_ +#. `A Linux Filesystem Tutorial`_ +#. `The Linux Virtual File System`_ +#. `Documentation/filesystems/vfs.txt`_ +#. `File systems sources`_ + +.. _Linux Virtual File System (presentation): http://www.coda.cs.cmu.edu/doc/talks/linuxvfs/ +.. _Understanding Unix/Linux Filesystem: http://www.cyberciti.biz/tips/understanding-unixlinux-file-system-part-i.html +.. _Creating Linux virtual filesystems: http://lwn.net/Articles/57369/ +.. _The Linux Documentation Project - VFS: http://www.tldp.org/LDP/tlk/fs/filesystem.html +.. _The "Virtual File System" in Linux: http://www.linux.it/~rubini/docs/vfs/vfs.html +.. _A Linux Filesystem Tutorial: http://inglorion.net/documents/tutorials/tutorfs/ +.. _The Linux Virtual File System: http://www.win.tue.nl/~aeb/linux/lk/lk-8.html +.. _Documentation/filesystems/vfs.txt: http://lxr.free-electrons.com/source/Documentation/filesystems/vfs.txt +.. _File systems sources: http://lxr.free-electrons.com/source/fs/ + +Exercises +========= + +.. include:: ../labs/exercises-summary.hrst +.. |LAB_NAME| replace:: filesystems + +.. + _[SURVEY-LABEL] + +myfs +---- + +To begin, we plan to get familiar with the interface exposed by the Linux kernel and the Virtual File System (VFS) component. That is why, for the beginning, we will work with a simple, virtual file system (i.e. without physical disk support). The file system is called ``myfs``. + +For this we will access the ``myfs/`` subdirectory in the laboratory skeleton. We will implement the superblock operations within this lab, and the next lab will continue with the inode operations. + +1. Register and unregister the myfs file system +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The first step in working with the file system is to register and unregister it. We want to do this for the file system described in ``myfs.c``. Check the file contents and follow the directions marked with ``TODO 1``. + +The steps you need to take are described in the section :ref:`RegisterUnregisterSection`. Use the ``"myfs"`` string for the file system name. + +.. note:: + Within the file system structure, use the ``myfs_mount`` function present in the code skeleton to fill the superblock (done when mounting). In ``myfs_mount`` call the function specific to a file system without disk support. As an argument for the specific mount function, use the function of type ``fill_super`` defined in the code skeleton. You can review the :ref:`FunctionsMountKillSBSection` section. + + To destroy the superblock (done at unmounting) use ``kill_litter_super``, also a function specific to a file system without disk support. The function is already implemented, you need to fill it in the :c:type:`struct file_system_type` structure. + + +After completing the sections marked with ``TODO 1`` , compile the module, copy it to the QEMU virtual machine, and start the virtual machine. Load the kernel module and then check the presence of the ``myfs`` file system within the ``/proc/filesystems`` file. + +At the moment, the file system is only registered, it does not expose operations to use it. If we try to mount it, the operation will fail. To try mounting, we create mount point ``/mnt/myfs/``. + +.. code-block:: console + + # mkdir -p /mnt/myfs + +and then we use the ``mount`` command: + +.. code-block:: console + + # mount -t myfs none /mnt/myfs + +The error message we get shows that we have not implemented the operations that work on the superblock. We will have to implement the operations on the superblock and initialize the root inode. We will do this further. + +.. note:: + + The ``none`` argument sent to the ``mount`` command indicates that we do not have a device from which to mount, the file system being a virtual one. Similarly, this is how the ``procfs`` or ``sysfs`` filesystems are mounted on Linux systems. + + +2. Completing myfs superblock +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +To be able to mount the file system, we need to fill its superblock's fields, that is, a generic VFS structure of type :c:type:`struct super_block`. +We will fill out the structure within the :c:func:`myfs_fill_super` function; the superblock is represented by the variable ``sb`` passed as an argument to the function. +Follow the hints marked with ``TODO 2``. + +.. note:: + + To fill the ``myfs_fill_super`` function, you can start from the example in the section :ref:`FillSuperSection`. + + For the superblock structure fields, use the macros defined within the code skeleton wherever possible. + + +The ``s_op`` field in the superblock structure must be initialized to the superblock operations structures (type :c:type:`struct super_operations`). You need to define such a structure. + +For information on defining the :c:type:`struct super_operations` structure and filling the superblock, see the section :ref:`SuperblockSection`. + +.. note:: + + Initialize the ``drop_inode`` and ``statfs`` fields of :c:type:`struct super_operations` structure. + + +Although the superblock will be properly initialized at this time, the mount operation will continue to fail. +In order for the operation to be successfully completed, the root inode will have to be initialized, which we will do for the next exercise. + + +3. Initialize myfs root inode +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The root inode is the inode of the file system root directory (i.e. ``/``). +Initialization is done when the file system is mounted. +The ``myfs_fill_super`` function, called at mount, is the one that calls the ``myfs_get_inode`` function that creates and initializes an inode. +Typically, this function is used to create and initialize all inodes; In this exercise, however, we will only create the root inode. + +The :c:type:`inode` is allocated inside the ``myfs_get_inode`` function (local variable ``inode``, allocated using the :c:func:`new_inode` function call). + +To successfully complete mounting the file system, you will need to fill the ``myfs_get_inode`` function. Follow directions marked with ``TODO 3``. A starting point is the `ramfs_get_inode `_ function. + +.. note:: + + To initialize ``uid``, ``gid`` and ``mode`` , you can use the :c:func:`inode_init_owner` function as it is used in :c:func:`ramfs_get_inode`. + When you call :c:func:`inode_init_owner`, use ``NULL`` as the second parameter because there is no parent directory for the created inode. + + Initialize the ``i_atime``, ``i_ctime``, and ``i_mtime`` of the VFS inode to the value returned by the :c:func:`current_time` function. + + You will need to initialize the operations for the inode of type directory. To do this, follow the steps: + + #. Check if this is a directory type inode using the ``S_ISDIR`` macro. + #. For the ``i_op`` and ``i_fop`` fields, use kernel functions that are already implemented: + + * for ``i_op``: :c:type:`simple_dir_inode_operations`. + * for ``i_fop``: :c:type:`simple_dir_operations` + + #. Increase the number of links for the directory using the :c:func:`inc_nlink` function. + +4. Test myfs mount and unmount +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Now we can mount the filesystem. +Follow the steps above to compile the kernel module, copy to the virtual machine, and start the virtual machine, then insert the kernel module, create the mount point ``/mnt/myfs/``, and mount the file system. +We verify that the file system was mounted by inspecting the ``/proc/mounts`` file. + +What inode number does the ``/mnt/myfs`` directory have? Why? + +.. note:: + + To display the inode number of a directory, use the command: + + .. code-block:: console + + ls -di /path/to/directory + + where ``/path/to/directory/`` is the path to the directory whose inode number we want to display. + +We check myfs file system statistics using the following command: + +.. code-block:: console + + stat -f /mnt/myfs + +We want to see what the mount point ``/mnt/myfs`` contains and if we can create files. +For this we run the commands: + +.. code-block:: console + + # ls -la /mnt/myfs + # touch /mnt/myfs/a.txt + +We can see that we can not create the ``a.txt`` file on the file system. +This is because we have not implemented the operations to work with inodes in the :c:type:`struct super_operations` structure. +We will implement these operations within the next lab. + +Unmount the file system using the command + +.. code-block:: console + + umount /mnt/myfs + +Unload the kernel module corresponding to the file system as well. + +.. note:: + + To test the entire functionality, you can use the ``test-myfs.sh`` script: + + .. code-block:: console + + ./test-myfs.sh + + The script is copied to the virtual machine using ``make copy`` only if it is executable: + + .. code-block:: console + + student@workstation:~/linux/tools/labs$ chmod +x skels/filesystems/myfs/test-myfs.sh + + +.. note:: + + The statistics displayed for the file system are minimal because the information is provided by the simple_statfs function. + +minfs +----- + +Next, we will implement the basics of a very simple file system, called ``minfs``, with disk support. +We will use a disk in the virtual machine that we will format and mount with the ``minfs`` filesystem. + +For this we will access the ``minfs/kernel`` directory from the laboratory skeleton and work with the code in ``minfs.c``. +Just like ``myfs`` we will not implement the operations for working with inodes. We will just limit to working with the superblock and, therefore, mounting. +The rest of the operations will be implemented in the next lab. + +Follow the diagram below to clarify the role of structures within the ``minfs`` file system. + +.. image:: ../res/minfs.png + +1. Registering and unregistering the minfs file system +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. note:: + + Before solving the exercise, we need to add a disk to the virtual machine. To do this, generate a file that we will use as the disk image using the following command: + + .. code-block:: console + + dd if=/dev/zero of=mydisk.img bs=1M count=100 + + and add the ``-drive file=mydisk.img,if=virtio,format=raw`` argument to the ``qemu`` command in ``qemu/Makefile`` (in the ``QEMU_OPTS`` variable). + The new argument for the ``qemu`` command must be added after the one for the existing disk (``YOCTO_IMAGE``). + +To register and unregister the file system, you will need to fill the ``minfs_fs_type`` and ``minfs_mount`` functions in ``minfs.c``. Follow the directions marked with ``TODO 1``. + +.. note:: + + In the file system structure, for mount, use the ``minfs_mount`` function from in the code skeleton. + In this function, call the function to mount a file system with disk support (See the :ref:`FunctionsMountKillSBSection` section. Use :c:func:`mount_bdev`). + Choose the most suitable function for destroying the superblock (done at unmount); keep in mind that it is a file system with disk support. Use the :c:func:`kill_block_super` function. + + Initialize the ``fs_flags`` field of the :c:type:`minfs_fs_type` structure with the appropriate value for a file system with disk support. See the section :ref:`RegisterUnregisterSection`. + + The function for filling the superblock is ``minfs_fill_super``. + +After completing the sections marked with ``TODO 1``, compile the module, copy it into the QEMU virtual machine, and start the virtual machine. +Load the kernel module and then check the presence of the ``minfs`` file system within the ``/proc/filesystems`` file. + +To test the mounting of the ``minfs`` file system we will need to format the disk with its structure. Formatting requires the ``mkfs.minfs`` formatting tool from the ``minfs/user`` directory. The utility is automatically compiled when running ``make build`` and copied to the virtual machine at ``make copy``. + +After compiling, copying, and starting the virtual machine, format the ``/dev/vdd`` using the formatting utility: + +.. code-block:: console + + # ./mkfs.minfs /dev/vdd + +Load the kernel module: + +.. code-block:: console + + # insmod minfs.ko + +Create mount point ``/mnt/minfs/``: + +.. code-block:: console + + # mkdir -p /mnt/minfs/ + +and mount the filesystem + +.. code-block:: console + + # mount -t minfs /dev/vdd /mnt/minfs/ + +The operation fails because the root inode is not initialized. + +2. Completing minfs superblock +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +To be able to mount the file system, you will need to fill the superblock (i.e a structure with type :c:type:`struct super_block`) within the ``minfs_fill_super`` function; it is the ``s`` argument of the function. +The structure of operations on the superblock is already defined: ``minfs_ops``. +Follow the directions marked with ``TODO 2``. You can also follow the implementation of the `minix_fill_super `_ function. + +.. note:: + + Some structures are found in the header file ``minfs.h``. + + For information on working with buffers, go to the :ref:`BufferCacheSection` section. + + Read the first block on the disk (block with index 0). + To read the block, use the :c:func:`sb_bread` function. + Cast the read data (the ``b_data`` field in the :c:type:`struct buffer_head` structure) to the structure storing the ``minfs`` superblock information on the disk: :c:type:`struct minfs_super_block`, defined in the source code file. + + Structure :c:type:`struct minfs_super_block` holds file system-specific information that is not found in the :c:type:`struct super_block` generic structure (in this case only version). + Those additional information (found in :c:type:`struct minfs_super_block` (on disk) but not in :c:type:`struct super_block` (VFS)) will be stored in the :c:type:`struct minfs_sb_info` structure. + +To check the functionality, we need a function for reading the root inode. +For the time being, use the ``myfs_get_inode`` function from ``myfs`` file system exercises. +Copy the function into the source code and call it the same as you did for myfs. +The third argument when calling the ``myfs_get_inode`` function is the inode creation permissions, similar to the virtual file system exercise (myfs). + +Validate the implementation by executing the commands from the previous exercise. + +3. Creating and destroying minfs inodes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +For mounting, we need to initialize the root inode, and to get the root inode, we need to implement the functions to work with inodes. +That is, you need to implement the ``minfs_alloc_inode`` and ``minfs_destroy_inode`` functions. +Follow the directions marked with ``TODO 3``. You can use the :c:func:`minix_alloc_inode` and :c:func:`minix_destroy_inode` functions as a model. + +For the implementation, look at the macros and structures in the ``minfs.h`` header file. + +.. note:: + + For memory allocation/deallocation in ``minfs_alloc_inode`` and ``minfs_destroy_inode``, we recommend using :c:func:`kzalloc` and :c:func:`kfree`. + + In ``minfs_alloc_inode`` allocate structures with type :c:type:`struct minfs_inode_info`, but only return structures with type :c:type:`struct inode`, i.e. return those given by the ``vfs_inode`` field. + + In the ``minfs_alloc_inode`` function, call :c:func:`inode_init_once` to initialize the inode. + + In the ``destroy_inode`` function, you can access the structure with type :c:type:`struct minfs_inode_info` using the ``container_of`` macro. + +.. note:: + + In this exercise, you have implemented the ``minfs_alloc_inode`` and ``minfs_destroy_inode`` functions, but they are not yet called. The correctness of the implementation will be checked at the end of the next exercise. + +4. Initialize minfs root inode +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Initializing the root inode is required in order to mount the file system. +For this, you will need to complete the ``minfs_ops`` structure with the ``minfs_alloc_inode`` and ``minfs_destroy_inode`` functions and fill the ``minfs_iget`` function. + +The ``minfs_iget`` function is the function called to allocate a VFS inode (i.e. :c:type:`struct inode`) and fill it with minfs inode-specific information from the disk (i.e. ``struct minfs_inode``). + +Follow the directions marked with ``TODO 4``. +Fill out the ``alloc_inode`` and ``destroy_inode`` fields of :c:type:`struct super_operations` structure with the functions implemented in the previous step. + +The information about the root inode is found in the second block on the disk (the inode with index 1). +Make ``minfs_iget`` read the root minfs inode from the disk (:c:type:`struct minfs_inode`) and fill in the VFS inode (:c:type:`struct inode`). + +In the ``minfs_fill_super`` function, replace the ``myfs_get_inode`` call with the ``minfs_iget`` function call. + +.. note:: + To implement the ``minfs_iget`` function, follow the implementation of `V1_minix_iget `_. + To read a block, use the :c:func:`sb_bread` function. + Cast the read data (the ``b_data`` field of the :c:type:`struct buffer_head` structure) to the minfs inode from the disk (:c:type:`struct minfs_inode`). + + The ``i_uid``, ``i_gid``, ``i_mode``, ``i_size`` must be filled in the VFS inode with the values in the minfs inode structure read from disk. + To initialize the ``i_uid`` and ``i_gid fields``, use the functions :c:func:`i_uid_write` , and :c:func:`i_gid_write`. + + Initialize the ``i_atime`` , ``i_ctime``, and ``i_mtime`` fields of the VFS inode to the value returned by the :c:func:`current_time` function. + + You will need to initialize the operations for the inode with type directory. To do this, follow the steps: + + #. Check if this is a directory type inode using the ``S_ISDIR`` macro. + #. For the ``i_op`` and ``i_fop`` fields, use kernel functions already implemented: + + * for ``i_op``: :c:func:`simple_dir_inode_operations` . + * for ``i_fop``: :c:func:`simple_dir_operations` + + #. Increment the number of links for the directory using the :c:func:`inc_nlink` function. + +5. Testing of minfs mount and unmount +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Now we can mount the filesystem. +Follow the steps above to compile the kernel module, copy to the virtual machine, start the virtual machine, and then insert the kernel module, create mount point ``/mnt/minfs/`` and mount the file system. +We verify that the file system was mounted by investigating the ``/proc/mounts`` file. + +We check that everything is fine by listing the mount point contents ``/mnt/minfs/``: + +.. code-block:: console + + # ls /mnt/minfs/ + +After mount and verification, unmount the file system and unload the module from the kernel. + +.. note:: + Alternatively, to test the entire functionality, you can use the ``test-minfs.sh`` script: + + .. code-block:: console + + # ./test-minfs.sh + + The script is copied to the virtual machine when running the ``make copy`` command only if is executable. + + .. code-block:: console + + student@workstation:~/linux/tools/labs$ chmod +x skels/filesystems/minfs/user/test-minfs.sh + diff --git a/Documentation/teaching/labs/filesystems_part2.rst b/Documentation/teaching/labs/filesystems_part2.rst new file mode 100644 index 00000000000000..9217cdbe68bdd7 --- /dev/null +++ b/Documentation/teaching/labs/filesystems_part2.rst @@ -0,0 +1,1076 @@ +============================ +File system drivers (Part 2) +============================ + +Lab objectives +============== + + * Improving the knowledge about inode, file and dentry. + * Acquiring knowledge about adding support for working with regular files and directories in VFS (*Virtual File System*). + * Acquiring knowledge about the internal implementation of a file system. + +Inode +===== + +The inode is an essential component of a UNIX file system and, at the same time, an important component of VFS. An inode is a metadata (it has information about information). +An inode uniquely identifies a file on disk and holds information about it (uid, gid, access rights, access times, pointers to data blocks, etc.). +An important aspect is that an inode does not have information about the file name (it is retained by the associated :c:type:`struct dentry` structure). + +The inode refers to a file on the disk. To refer an open file (associated with a file descriptor within a process), the :c:type:`struct file` structure is used. +An inode can have any number of (zero or more) ``file`` structures associated (multiple processes can open the same file, or a process can open the same file several times). + +Inode exists both as a VFS entity (in memory) and as a disk entity (for UNIX, HFS, NTFS, etc.). +The inode in VFS is represented by the structure :c:type:`struct inode`. +Like the other structures in VFS, :c:type:`struct inode` is a generic structure that covers the options for all supported file types, even those that do not have an associated disk entity (such as FAT). + +The inode structure +------------------- + +The inode structure is the same for all file systems. In general, file systems also have private information. These are referenced through the ``i_private`` field of the structure. +Conventionally, the structure that keeps that particular information is called ``_inode_info``, where ``fsname`` represents the file system name. For example, minix and ext4 filesystems store particular information in structures :c:type:`struct minix_inode_info`, or :c:type:`struct ext4_inode_info`. + +Some of the important fields of :c:type:`struct inode` are: + + * ``i_sb`` : The superblock structure of the file system the inode belongs to. + * ``i_rdev``: the device on which this file system is mounted + * ``i_ino`` : the number of the inode (uniquely identifies the inode within the file system) + * ``i_blkbits``: number of bits used for the block size == log\ :sub:`2`\ (block size) + * ``i_mode``, ``i_uid``, ``i_gid``: access rights, uid, gid + + * ``i_size``: file/directory/etc. size in bytes + * ``i_mtime``, ``i_atime``, ``i_ctime``: change, access, and creation time + * ``i_nlink``: the number of names entries (dentries) that use this inode; for file systems without links (either hard or symbolic) this is always set to 1 + * ``i_blocks``: the number of blocks used by the file (all blocks, not just data); this is only used by the quota subsystem + * ``i_op``, ``i_fop``: pointers to operations structures: :c:type:`struct inode_operations` and :c:type:`struct file_operations`; ``i_mapping->a_ops`` contains a pointer to :c:type:`struct address_space_operations`. + * ``i_count``: the inode counter indicating how many kernel components use it. + +Some functions that can be used to work with inodes: + + * :c:func:`new_inode`: creates a new inode, sets the ``i_nlink`` field to 1 and initializes ``i_blkbits``, ``i_sb`` and ``i_dev``; + * :c:func:`insert_inode_hash`: adds the inode to the hash table of inodes; an interesting effect of this call is that the inode will be written to the disk if it is marked as dirty; + + .. warning:: + + An inode created with :c:func:`new_inode` is not in the hash table, and unless you have serious reasons not to, you must enter it in the hash table; + + * :c:func:`mark_inode_dirty`: marks the inode as dirty; at a later moment, it will be written on the disc; + * :c:func:`iget_locked`: loads the inode with the given number from the disk, if it is not already loaded; + * :c:func:`unlock_new_inode`: used in conjunction with :c:func:`iget_locked`, releases the lock on the inode; + * :c:func:`iput`: tells the kernel that the work on the inode is finished; if no one else uses it, it will be destroyed (after being written on the disk if it is maked as dirty); + * :c:func:`make_bad_inode`: tells the kernel that the inode can not be used; It is generally used from the function that reads the inode when the inode could not be read from the disk, being invalid. + +Inode operations +---------------- + +Getting an inode +^^^^^^^^^^^^^^^^ + +One of the main inode operations is obtaining an inode (the :c:type:`struct inode` in VFS). +Until version ``2.6.24`` of the Linux kernel, the developer defined a ``read_inode`` function. +Starting with version ``2.6.25``, the developer must define a ``_iget`` where ```` is the name of the file system. +This function is responsible with finding the VFS inode if it exists or creating a new one and filling it with the information from the disk. + +Generally, this function will call :c:func:`iget_locked` to get the inode structure from VFS. If the inode is newly created then it will need to read the inode from the disk (using :c:func:`sb_bread`) and fill in the useful information. + +An example of such a function is :c:func:`minix_iget`: + +.. code-block:: c + + static struct inode *V1_minix_iget(struct inode *inode) + { + struct buffer_head * bh; + struct minix_inode * raw_inode; + struct minix_inode_info *minix_inode = minix_i(inode); + int i; + + raw_inode = minix_V1_raw_inode(inode->i_sb, inode->i_ino, &bh); + if (!raw_inode) { + iget_failed(inode); + return ERR_PTR(-EIO); + ... + } + + struct inode *minix_iget(struct super_block *sb, unsigned long ino) + { + struct inode *inode; + + inode = iget_locked(sb, ino); + if (!inode) + return ERR_PTR(-ENOMEM); + if (!(inode->i_state & I_NEW)) + return inode; + + if (INODE_VERSION(inode) == MINIX_V1) + return V1_minix_iget(inode); + ... + } + +The minix_iget function gets the VFS inode using :c:func:`iget_locked`. +If the inode is already existing (not new == the ``I_NEW`` flag is not set) the function returns. +Otherwise, the function calls the :c:func:`V1_minix_iget` function that will read the inode from the disk using :c:func:`minix_V1_raw_inode` and then complete the VFS inode with the read information. + +Superoperations +^^^^^^^^^^^^^^^ + +Many of the superoperations (components of the :c:type:`struct super_operations` structure used by the superblock) are used when working with inodes. These operations are described next: + + * ``alloc_inode``: allocates an inode. + Usually, this funcion allocates a :c:type:`struct _inode_info` structure and performs basic VFS inode initialization (using :c:func:`inode_init_once`); + minix uses for allocation the :c:func:`kmem_cache_alloc` function that interacts with the SLAB subsystem. + For each allocation, the cache construction is called, which in the case of minix is the :c:func:`init_once` function. + Alternatively, :c:func:`kmalloc` can be used, in which case the :c:func:`inode_init_once` function should be called. + The :c:func:`alloc_inode` function will be called by the :c:func:`new_inode` and :c:func:`iget_locked` functions. + * ``write_inode`` : saves/updates the inode received as a parameter on disk; to update the inode, though inefficient, for beginners it is recommended to use the following sequence of operations: + + * load the inode from the disk using the :c:func:`sb_bread` function; + * modify the buffer according to the saved inode; + * mark the buffer as dirty using :c:func:`mark_buffer_dirty`; the kernel will then handle its writing on the disk; + * an example is the :c:func:`minix_write_inode` function in the ``minix`` file system + + * ``evict_inode``: removes any information about the inode with the number received in the ``i_ino`` field from the disk and memory (both the inode on the disk and the associated data blocks). This involves performing the following operations: + + * delete the inode from the disk; + * updates disk bitmaps (if any); + * delete the inode from the page cache by calling :c:func:`truncate_inode_pages`; + * delete the inode from memory by calling :c:func:`clear_inode` ; + * an example is the :c:func:`minix_evict_inode` function from the minix file system. + + * ``destroy_inode`` releases the memory occupied by inode + +inode_operations +^^^^^^^^^^^^^^^^ + +The inode operations are described by the :c:type:`struct inode_operations` structure. + +Inodes are of several types: file, directory, special file (pipe, fifo), block device, character device, link etc. +For this reason, the operations that an inode needs to implement are different for each type of inode. +Below are detailed operations for a :ref:`file type inode ` and a :ref:`directory inode `. + +The operations of an inode are initialized and accessed using the ``i_op`` field of the structure :c:type:`struct inode`. + +The file structure +================== + +The ``file`` structure corresponds to a file open by a process and exists only in memory, being associated with an inode. +It is the closest VFS entity to user-space; the structure fields contain familiar information of a user-space file (access mode, file position, etc.) and the operations with it are performed by known system calls (``read``, ``write`` , etc.). + +The file operations are described by the :c:type:`struct file_operations` structure. + +The file operations for a file system are initialized using the ``i_fop`` field of the :c:type:`struct inode` structure. +When opening a file, the VFS initializes the ``f_op`` field of the :c:type:`struct file` structure with address of ``inode->i_fop``, such that subsequent system calls use the value stored in the ``file->f_op``. + +.. _FileInodes: + +Regular files inodes +==================== + +To work with the inode, the ``i_op`` and ``i_fop`` fields of the inode structure must be filled in. +The type of the inode determines the operations that it needs to implement. + +.. _FileOperations: + +Regular files inode operations +------------------------------ + +In the ``minix`` file system, the ``minix_file_inode_operations`` structure is defined for the operations on an inode and for the file operations the ``minix_file_operations`` structure is defined: + +.. code-block:: c + + const struct file_operations minix_file_operations = { + .llseek = generic_file_llseek, + .read_iter = generic_file_read_iter, + //... + .write_iter = generic_file_write_iter, + //... + .mmap = generic_file_mmap, + //... + }; + + const struct inode_operations minix_file_inode_operations = { + .setattr = minix_setattr, + .getattr = minix_getattr, + }; + + //... + if (S_ISREG(inode->i_mode)) { + inode->i_op = &minix_file_inode_operations; + inode->i_fop = &minix_file_operations; + } + //... + + + +The functions :c:func:`generic_file_llseek` , :c:func:`generic_file_mmap` , :c:func:`generic_file_read_iter` and :c:func:`generic_file_write_iter` are implemented in the kernel. + +For simple file systems, only the truncation operation (``truncate`` system call) must be implemented. +Although initially there was a dedicated operation, starting with 3.14 the operation was embedded in ``setattr``: if the paste size is different from the current size of the inode, then a truncate operation must be performed. +An example of implementing this verification is in the :c:func:`minix_setattr` function: + +.. code-block:: c + + static int minix_setattr(struct dentry *dentry, struct iattr *attr) + { + struct inode *inode = d_inode(dentry); + int error; + + error = setattr_prepare(dentry, attr); + if (error) + return error; + + if ((attr->ia_valid & ATTR_SIZE) && + attr->ia_size != i_size_read(inode)) { + error = inode_newsize_ok(inode, attr->ia_size); + if (error) + return error; + + truncate_setsize(inode, attr->ia_size); + minix_truncate(inode); + } + + setattr_copy(inode, attr); + mark_inode_dirty(inode); + return 0; + } + +The truncate operation involves: + + * freeing blocks of data on the disk that are now extra (if the new dimension is smaller than the old one) or allocating new blocks (for cases where the new dimension is larger) + * updating disk bit maps (if used); + * updating the inode; + * filling with zero the space that was left unused from the last block using the :c:func:`block_truncate_page` function. + +An example of the implementation of the cropping operation is the :c:func:`minix_truncate` function in the ``minix`` file system. + +.. _AddressSpaceOperations: + +Address space operations +------------------------ + +There is a close link between the address space of a process and files: the execution of the programs is done almost exclusively by mapping the file into the process address space. +Because this approach works very well and is quite general, it can also be used for regular system calls such as ``read`` and ``write``. + +The structure that describes the address space is :c:type:`struct address_space`, and the operations with it are described by the structure :c:type:`struct address_space_operations`. To initialize the address space operations, fill ``inode->i_mapping->a_ops`` of the file type inode. + +An example is the ``minix_aops`` structure in the minix file system: + +.. code-block:: c + + static const struct address_space_operations minix_aops = { + .readpage = minix_readpage, + .writepage = minix_writepage, + .write_begin = minix_write_begin, + .write_end = generic_write_end, + .bmap = minix_bmap + }; + + //... + if (S_ISREG(inode->i_mode)) { + inode->i_mapping->a_ops = &minix_aops; + } + //... + +The :c:func:`generic_write_end` function is already implemented. +Most of the specific functions are very easy to implement, as follows: + +.. code-block:: c + + static int minix_writepage(struct page *page, struct writeback_control *wbc) + { + return block_write_full_page(page, minix_get_block, wbc); + } + + static int minix_readpage(struct file *file, struct page *page) + { + return block_read_full_page(page, minix_get_block); + } + + static void minix_write_failed(struct address_space *mapping, loff_t to) + { + struct inode *inode = mapping->host; + + if (to > inode->i_size) { + truncate_pagecache(inode, inode->i_size); + minix_truncate(inode); + } + } + + static int minix_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) + { + int ret; + + ret = block_write_begin(mapping, pos, len, flags, pagep, + minix_get_block); + if (unlikely(ret)) + minix_write_failed(mapping, pos + len); + + return ret; + } + + static sector_t minix_bmap(struct address_space *mapping, sector_t block) + { + return generic_block_bmap(mapping, block, minix_get_block); + } + +All that needs to be done is to implement :c:type:`minix_get_block`, which has to translate a block of a file into a block on the device. +If the flag ``create`` received as a parameter is set, a new block must be allocated. +In case a new block is created, the bit map must be updated accordingly. +To notify the kernel not to read the block from the disk, ``bh`` must be marked with :c:func:`set_buffer_new`. The buffer must be associated with the block through :c:func:`map_bh`. + +Dentry structure +================ + +Directories operations use the :c:type:`struct dentry` structure. +Its main task is to make links between inodes and filenames. +The important fields of this structure are presented below: + +.. code-block:: c + + struct dentry { + //... + struct inode *d_inode; /* associated inode */ + //... + struct dentry *d_parent; /* dentry object of parent */ + struct qstr d_name; /* dentry name */ + //... + + struct dentry_operations *d_op; /* dentry operations table */ + struct super_block *d_sb; /* superblock of file */ + void *d_fsdata; /* filesystem-specific data */ + //... + }; + +Fields meaning: + + * ``d_inode``: the inode referenced by this dentry; + * ``d_parent``: the dentry associated with the parent directory; + * ``d_name``: a :c:type:`struct qstr` structure that contains the fields ``name`` and ``len`` (the name and the length of the name). + * ``d_op``: operations with dentries, represented by the :c:type:`struct dentry_operations` structure. + The kernel implements default operations so there is no need to (re)implement them. Some file systems can do optimizations based on the specific structure of the dentries. + * ``d_fsdata``: field reserved for the file system that implements dentry operations; + +Dentry operations +----------------- + +The most commonly operations applied to dentries are: + + * ``d_make_root``: allocates the root dentry. It is generally used in the function that is called to read the superblock (``fill_super``), which must initialize the root directory. + So the root inode is obtained from the superblock and is used as an argument to this function, to fill the ``s_root`` field from the :c:type:`struct super_block` structure. + * ``d_add``: associates a dentry with an inode; the dentry received as a parameter in the calls discussed above signifies the entry (name, length) that needs to be created. This function will be used when creating/loading a new inode that does not have a dentry associated with it and has not yet been introduced to the hash table of inodes (at ``lookup``); + * ``d_instantiate``: The lighter version of the previous call, in which the dentry was previously added in the hash table. + +.. warning:: + + ``d_instantiate`` must be used to implement create calls (``mkdir``, ``mknod``, ``rename``, ``symlink``) and NOT ``d_add``. + +.. _DirectoryInodes: + +Directory inodes operations +=========================== + +The operations for directory type inodes have a higher complexity level than the ones for files. +The developer must define operations for inodes and operations for files. +In ``minix``, these operations are defined in :c:type:`minix_dir_inode_operations` and :c:type:`minix_dir_operations`: + +.. code-block:: c + + struct inode_operations minix_dir_inode_operations = { + .create = minix_create, + .lookup = minix_lookup, + .link = minix_link, + .unlink = minix_unlink, + .symlink = minix_symlink, + .mkdir = minix_mkdir, + .rmdir = minix_rmdir, + .mknod = minix_mknod, + //... + }; + + struct file_operations minix_dir_operations = { + .llseek = generic_file_llseek, + .read = generic_read_dir, + .iterate = minix_readdir, + //... + }; + + //... + if (S_ISDIR(inode->i_mode)) { + inode->i_op = &minix_dir_inode_operations; + inode->i_fop = &minix_dir_operations; + inode->i_mapping->a_ops = &minix_aops; + } + //... + +The only function already implemented is :c:func:`generic_read_dir`. + +The functions that implement the operations on directory inodes are the ones described below. + +Creating an inode +----------------- + +The inode creation function is indicated by the field ``create`` in the ``inode_operations`` structure. +In the minix case, the function is :c:func:`minix_create`. +This function is called by the ``open`` and ``creat`` system calls. Such a function performs the following operations: + + #. Introduces a new entry into the physical structure on the disk; the update of the bit maps on the disk must not be forgotten. + #. Configures access rights to those received as a parameter. + #. Marks the inode as dirty with the :c:func:`mark_inode_dirty` function. + #. Instantiates the directory entry (``dentry``) with the ``d_instantiate`` function. + +Creating a directory +-------------------- + +The directory creation function is indicated by the ``mkdir`` field in the ``inode_operations`` structure. +In the minix case, the function is :c:func:`minix_mkdir`. +This function is called by the ``mkdir`` system call. Such a function performs the following operations: + + #. Calls :c:func:`minix_create`. + #. Allocates a data block for the directory. + #. Creates the ``"."`` and ``".."`` entries. + +Creating a link +--------------- + +The link creation function (hard link) is indicated by the ``link`` field in the ``inode_operations`` structure. +In the minix case, the function is :c:func:`minix_link`. +This function is called by the ``link`` system call. Such a function performs the following operations: + + * Binds the new dentry to the inode. + * Increments the ``i_nlink`` field of the inode. + * Marks the inode as dirty using the :c:func:`mark_inode_dirty` function. + +Creating a symbolic link +------------------------ + +The symbolic link creation function is indicated by the ``symlink`` field in the ``inode_operations`` structure. +In the minix case, the function is :c:func:`minix_symlink`. +The operations to be performed are similar to ``minix_link`` with the differences being given by the fact that a symbolic link is created. + +Deleting a link +--------------- + +The link delete function (hard link) is indicated by the ``unlink`` field in the ``inode_operations`` structure. +In the minix case, the function is :c:func:`minix_unlink`. +This function is called by the ``unlink`` system call. Such a function performs the following operations: + + #. Deletes the directory entry given as a parameter from the physical disk structure. + #. Decrements the ``i_nlink`` counter of the inode to which the entry points (otherwise the inode will never be deleted). + +Deleting a directory +-------------------- + +The directory delete function is indicated by the ``rmdir`` field in the ``inode_operations`` structure. +In the minix case, the function is :c:func:`minix_rmdir`. +This function is called by the ``rmdir`` system call. +Such a function performs the following operations: + + #. Performs the operations done by ``minix_unlink``. + #. Ensures that the directory is empty; otherwise, returns ``ENOTEMPTY``. + #. Also deletes the data blocks. + +Searching for an inode in a directory +------------------------------------- + +The function that searches for an entry in a directory and extracts the inode is indicated by the ``lookup`` field in the ``inode_operations`` structure. +In the minix case, the function is ``minix_lookup``. +This function is called indirectly when information about the inode associated with an entry in a directory is needed. +Such a function performs the following operations: + + #. Searches in the directory indicated by ``dir`` the entry having the name ``dentry->d_name.name``. + #. If the entry is found, it will return ``NULL`` and associate the inode with the name using the :c:func:`d_add` function. + #. Otherwise, returns ``ERR_PTR``. + +Iterating through entries in a directory +---------------------------------------- + +The function which iterates through the entries in a directory (lists the directory contents) is indicated by the field ``iterate`` in the ``struct file_operations`` structure. +In the minix case, the function is ``minix_readdir``. +This function is called by the ``readdir`` system call. + +The function returns either all entries in the directory or just a part when the buffer allocated for it is not available. +A call of this function can return: + + * a number equal to the existing number of entries if there is enough space in the corresponding user space buffer; + * a number smaller than the actual number of entries, as much as there was space in the corresponding user space buffer; + * ``0``, where there are no more entries to read. + +The function will be called consecutively until all available entries are read. The function is called at least twice. + + * It is only called twice if: + + * the first call reads all entries and returns their number; + * the second call returns 0, having no other entries to read. + + * It is called more than twice if the first call does not return the total number of entries. + +The function performs the following operations: + + #. Iterates over the entries (the dentries) from the current directory. + #. For each dentry found, increments ``ctx->pos``. + #. For each valid dentry (an inode other than ``0``, for example), calls the :c:func:`dir_emit` function. + #. If the :c:func:`dir_emit` function returns a value other than zero, it means that the buffer in the user space is full and the function returns. + +The arguments of the ``dir_emit`` function are: + + * ``ctx`` is the directory iteration context, passed as an argument to the ``iterate`` function; + * ``name`` is the name of the entry (a string of characters); + * ``name_len`` is the length of the entry name; + * ``ino`` is the inode number associated with the entry; + * ``type`` identifies the entry type: ``DT_REG`` (file), ``DT_DIR`` (directory), ``DT_UNKNOWN`` etc. ``DT_UNKNOWN`` can be used when the entry type is unknown. + +.. _BitmapOperations: + +Bitmap operations +================= + +When working with the file systems, management information (what block is free or busy, what inode is free or busy) is stored using bitmaps. +For this we often need to use bit operations. Such operations are: + + * searching the first 0 bit: representing a free block or inode + * marking a bit as 1: marking a busy block or inode + +The bitmap operations are found in headers from ``include/asm-generic/bitops``, especially in ``find.h`` and ``atomic.h``. Usual functions, with names indicating their role, are: + + * :c:func:`find_first_zero_bit` + * :c:func:`find_first_bit` + * :c:func:`set_bit` + * :c:func:`clear_bit` + * :c:func:`test_and_set_bit` + * :c:func:`test_and_clear_bit` + +These functions usually receive the address of the bitmap, possibly its size (in bytes) and, if necessary, the index of the bit that needs to be activated (set) or deactivated (clear). + +Some usage examples are listed below: + +.. code-block:: c + + unsigned int map; + unsigned char array_map[NUM_BYTES]; + size_t idx; + int changed; + + /* Find first zero bit in 32 bit integer. */ + idx = find_first_zero_bit(&map, 32); + printk (KERN_ALERT "The %zu-th bit is the first zero bit.\n", idx); + + /* Find first one bit in NUM_BYTES bytes array. */ + idx = find_first_bit(array_map, NUM_BYTES * 8); + printk (KERN_ALERT "The %zu-th bit is the first one bit.\n", idx); + + /* + * Clear the idx-th bit in integer. + * It is assumed idx is less the number of bits in integer. + */ + clear_bit(idx, &map); + + /* + * Test and set the idx-th bit in array. + * It is assumed idx is less the number of bits in array. + */ + changed = __test_and_set_bit(idx, &sbi->imap); + if (changed) + printk(KERN_ALERT "%zu-th bit changed\n", idx); + +Further reading +=============== + +#. Robert Love -- Linux Kernel Development, Second Edition -- Chapter + 12. The Virtual Filesystem +#. Understanding the Linux Kernel, 3rd edition - Chapter 12. The Virtual + Filesystem +#. `Linux Virtual File System (presentation)`_ +#. `Understanding Unix/Linux Filesystem`_ +#. `Creating Linux virtual filesystems`_ +#. `The Linux Documentation Project - VFS`_ +#. `The "Virtual File System" in Linux`_ +#. `A Linux Filesystem Tutorial`_ +#. `The Linux Virtual File System`_ +#. `Documentation/filesystems/vfs.txt`_ +#. `File systems sources`_ + +.. _Linux Virtual File System (presentation): http://www.coda.cs.cmu.edu/doc/talks/linuxvfs/ +.. _Understanding Unix/Linux Filesystem: http://www.cyberciti.biz/tips/understanding-unixlinux-file-system-part-i.html +.. _Creating Linux virtual filesystems: http://lwn.net/Articles/57369/ +.. _The Linux Documentation Project - VFS: http://www.tldp.org/LDP/tlk/fs/filesystem.html +.. _The "Virtual File System" in Linux: http://www.linux.it/~rubini/docs/vfs/vfs.html +.. _A Linux Filesystem Tutorial: http://inglorion.net/documents/tutorials/tutorfs/ +.. _The Linux Virtual File System: http://www.win.tue.nl/~aeb/linux/lk/lk-8.html +.. _Documentation/filesystems/vfs.txt: http://lxr.free-electrons.com/source/Documentation/filesystems/vfs.txt +.. _File systems sources: http://lxr.free-electrons.com/source/fs/ + +Exercises +========= + +.. include:: ../labs/exercises-summary.hrst +.. |LAB_NAME| replace:: filesystems + +.. important:: + + In this lab, we will continue the implementation of the file systems started in the previous one. + For this, we will generate the laboratory skeleton using the following command: + + .. code-block:: console + + TODO=5 LABS=filesystems make skels + + After this, we will start the implementation from ``TODO 5``. + +myfs +---- + +For the exercises below, we will use the ``myfs`` file system whose implementation we started with the previous lab. +We stopped after mounting the file system and now we will continue with the operations for regular files and directories. +At the end of these exercises, we will be able to create, modify and delete regular directories and files. + +We will mostly use the ``inode`` and ``dentry`` VFS structures. +The ``inode`` structure defines a file (of any type: regular, directory, link), while the ``dentry`` structure defines a name, which is an entry in a directory. + +For this we will access the ``myfs`` directory in the lab skeleton. +The previously generated skeleton contains the solution for the previous lab; we will start from this. As in the previous lab, we will use the ``ramfs`` file system as a starting point. + +1. Directory operations +^^^^^^^^^^^^^^^^^^^^^^^ + +To begin with, we will implement the operations for working with directories. +The operations of creating a file or deleting a file are also directory operations; these operations result in adding or deleting a directory entry (*dentry*). + +At the end of this exercise we will be able to create and delete entries in the file system. We will not be able to read and write to regular files; we will do so in the next exercise. + +Follow directions marked with ``TODO 5`` which will guide you through the steps you need to take. + +You will need to specify the following directory operations: + + * create a file (``create`` function) + * search (``lookup`` function) + * link (``link`` function) + * create directory (``mkdir`` function) + * deletion (``rmdir`` and ``unlink`` functions) + * create node (``mknod``) + * rename (``rename`` function) + +For this, define the ``myfs_dir_inode_operations`` structure in the code, where marked with ``TODO 5``. +To begin, just define the structure ``myfs_dir_inode_operations``; you will define the structures ``myfs_file_operations``, ``myfs_file_inode_operations`` , and ``myfs_aops`` in the next exercise. + +.. tip:: + + Read the section :ref:`DirectoryInodes` + + As a model, you are following the ``ramfs_dir_inode_operations`` structure. + +Implement the ``mkdir``, ``mknod`` and ``create`` operations inside ``myfs_mkdir``, ``myfs_mknod`` and ``myfs_create``. +These operations will allow you to create directories and files in the file system. + +.. tip:: + + We recommend making the code modular using a ``mknod`` function, which you can also use for the next exercise. + For inode reading and allocation, use ``myfs_get_inode``, which is already implemented. + + As a model, follow the next functions implemented in the ``ramfs`` file system: + + * :c:func:`ramfs_mknod` + * :c:func:`ramfs_mkdir` + * :c:func:`ramfs_create` + +For the other functions, use generic calls (``simple_*``) already defined in VFS. + +In the ``myfs_get_inode`` function, initialize the operations fields of the directory inodes: + + * ``i_op`` must be initialized to the address of the structure ``myfs_dir_inode_operations``; + * ``i_fop`` must be initialized to the address of the structure ``simple_dir_operations``, defined in VFS. + +.. note:: + + ``i_op`` is a pointer to a structure of type :c:type:`struct inode_operations` containing operations that have to do with the inode, which are, for a directory, creating a new entry, listing entries, deleting entries, etc. + + ``i_fop`` is a pointer to a structure of type :c:type:`struct file_operations` containing operations that have to do with the ``file`` structure associated with the inode, such as ``read``, ``write``, and ``lseek``. + +Testing +""""""" + +Once the module is done, we can test the creation of files and directories. +To do this, we compile the kernel module (using ``make build``) and copy the resulting file (``myfs.ko``) and the test scripts (``test-myfs-{1,2}.sh``) in the virtual machine directory (using ``make copy``). + +.. note:: + + The test scripts are copied to the virtual machine using ``make copy`` only if they are executable: + + .. code-block:: console + + student@workstation:~/linux/tools/labs$ chmod +x skels/filesystems/myfs/test-myfs-*.sh + +After starting the virtual machine, insert the module, create the mount point and mount the file system: + +.. code-block:: console + + # insmod myfs.ko + # mkdir -p /mnt/myfs + # mount -t myfs none /mnt/myfs + +Now we can create file hierarchies and subdirectories in the mounted directory (``/mnt/myfs``). +We use commands like the ones below: + +.. code-block:: console + + # touch /mnt/myfs/peanuts.txt + # mkdir -p /mnt/myfs/mountain/forest + # touch /mnt/myfs/mountain/forest/tree.txt + # rm /mnt/myfs/mountain/forest/tree.txt + # rmdir /mnt/myfs/mountain/forest + +At this time we can not read or write files. When running commands such as the following ones we will get errors. + +.. code-block:: console + + # echo "chocolate" > /mnt/myfs/peanuts.txt + # cat /mnt/myfs/peanuts.txt + +This happens because we have not implemented the operations for working with files; we will do so further. + +To unload the kernel module, use the command + +.. code-block:: console + + umount /mnt/myfs + rmmod myfs + +To test the functionality provided by the kernel module, we can use the dedicated script ``test-myfs-1.sh``. +If the implementation is correct, no error messages will be displayed. + +2. File operations +^^^^^^^^^^^^^^^^^^ + +We want to implement the operations for working with files, which are used for accessing a file's content: read, write, truncate, etc. +For this you will specify the operations described in the structures :c:type:`struct inode_operations`, :c:type:`struct file_operations` and :c:type:`struct address_space_operations`. + +Follow the locations marked with ``TODO`` 6 which will guide you through the steps you need to take. + +Start by defining ``myfs_file_inode_operations`` and ``myfs_file_operations``. + +.. tip:: + + Read the section :ref:`FileOperations`. + + Use the generic function provided by VFS. + + An example of implementation is the ``ramfs`` file system. + Follow the implementation of ``ramfs_file_inode_operations`` and ``ramfs_file_operations``. + +Inside the function ``myfs_get_inode``, initialize the operations fields for the regular file inodes: + + * ``i_op`` must be initialized to ``myfs_file_inode_operations``; + * ``i_fop`` msust be initialized to ``myfs_file_operations``. + +Continue with defining the structure ``myfs_aops``. + +.. tip:: + + Read the section :ref:`AddressSpaceOperations`. + + Use the generic functions provided by VFS. + + An implementation example is the ``ramfs`` file system: the ``ramfs_aops`` structure. + + You do not need to define the function of type ``set_page_dirty``. + +Initialize the ``i_mapping->a_ops`` field of the inode structure to ``myfs_aops``. + +Testing +""""""" + +For testing, we use the steps described in the previous exercise. +In addition to those steps, we will now be able to read, write and modify a file using commands like the ones below: + +.. code-block:: console + + # echo "chocolate" > /mnt/myfs/peanuts.txt + # cat /mnt/myfs/peanuts.txt + +To test the functionality provided by the module, we can use the dedicated script: + +.. code-block:: console + + # ./test-myfs-2.sh + +If the implementation is correct, no error messages will be displayed when running the above script. + +minfs +----- + +For the exercises below, we will use the minfs file system whose development started in the previous lab. +This is a file system with disk support. +We stopped after mounting the file system and now we will continue with the operations on regular files and directories. +At the end of these exercises we will be able to create and delete entries in the file system. + +We will mainly use the :c:type:`inode` and :c:type:`dentry` VFS structures. +The inode structure defines a file (of any type: regular, directory, link), while the dentry structure defines a name, which is a directory entry. + +For this we will access the ``minfs/kernel`` directory from the laboratory skeleton. +The generated skeleton contains the solution from the previous lab; we will start from this. +As in the previous lab, we will use the ``minix`` file system as a starting point. + +We will use the formatting tool ``mkfs.minfs`` in the ``minfs/user`` directory which is automatically compiled when running ``make build`` and copied to the virtual machine at ``make copy``. + +The formatting tool prepares a virtual machine disk using a command like + +.. code-block:: console + + # ./mkfs.minfs /dev/vdb + +After formatting, the disk has a structure like the one in the diagram below: + +.. image:: ../res/minfs_arch.png + +As shown in the diagram, ``minfs`` is a minimalist file system. +``minfs`` contains a maximum of 32 inodes, each inode having a single data block (the file size is limited to block size). +The super block contains a 32-bit map (``imap``), each bit indicating the use of an inode. + +.. note:: + + Before you start working, go through the ``minfs/kernel/minfs.h`` header file. + This file contains the structures and macros that will be used in these exercises. + These structures and macros define the file system as described in the diagram above. + +1. Iterate operation +^^^^^^^^^^^^^^^^^^^^ + +At first we want to be able to list the contents of the root directory. +For this we must be able to read the entries in the root directory, which means implementing the ``iterate`` operation. +The ``iterate`` operation is a field within the ``minfs_dir_operations`` structure (of type ``file_operations``) and is implemented by the function ``minfs_readdir``. We need to implement this function. + +Follow directions marked with ``TODO 5`` which will guide you through the steps you need to take. + +.. tip:: + + Read the section :ref:`DirectoryInodes` + + As a starting point, follow the :c:func:`minix_readdir` function. + The function is rather complicated, but it gives you an insight into the steps you have to do. + + Follow, in ``minfs.c`` and ``minfs.h``, the definitions of structures ``struct minfs_inode_info``, ``struct minfs_inode`` and ``struct minfs_dir_entry``. + You will use them in the ``minfs_readdir`` implementation. + +Obtain the inode and the structure ``struct minfs_inode_info`` associated with the directory. +The structure ``struct minfs_inode_info`` is useful to find out the directory's data block. +From this structure you get the ``data_block`` field, representing the data block index on the disk. + +.. tip:: + + To get the structure ``struct minfs_inode_info`` structure, use :c:func:`list_entry` or :c:func:`container_of`. + +Use :c:func:`sb_bread` to read the directory data block. + +.. tip:: + + The data block of the directory is indicated by the ``data_block`` field of the structure ``struct minfs_inode_info`` corresponding to the directory. + + The data in the block is referenced by the ``b_data`` field of the ``buffer_head`` structure (the usual code will be ``bh->b_data``). + This block (being the data block of a directory) contains an array of at most ``MINFS_NUM_ENTRIES`` entries of type ``struct minfs_dir_entry`` (directory entries specific to ``minfs``). + Use casting to ``struct minfs_dir_entry *`` to work with the data in the block. + +Iterate over all the entries in the data block and fill the user space buffer inside the ``for`` loop. + +.. tip:: + + For each index, get the corresponding entry of the ``struct minfs_dir_entry`` by using pointer arithmetics on the ``bh->b_data`` field. + Ignore dentries that have an ``ino`` field equal to 0. Such a dentry is a free slot in the director's dentry list. + + For each valid entry, there is an existing call :c:func:`dir_emit` with the appropriate parameters. This is the call that sends the dentries to the caller (and then to user space). + + Check the call examples in :c:func:`qnx6_readdir` and :c:func:`minix_readdir`. + +Testing +""""""" + +Once the module is done, we can test the listing of the root directory contents. +To do this, we compile the kernel module (``make build``) and copy the result to the virtual machine together with the test scripts (``minfs/user/test-minfs-{0,1}.sh``) and the formatting utility (``minfs/user/mkfs.minfs``) using ``make copy``, then start the machine. + +.. note:: + + The test scripts are copied to the virtual machine only if they are executable: + + .. code-block:: console + + student@eg106:~/src/linux/tools/labs$ chmod +x skels/filesystems/minfs/user/test-minfs*.sh + +After we start the virtual machine, we format the ``/dev/vdb`` disk, create the mount point and mount the file system: + +.. code-block:: console + + # ./mkfs.minfs /dev/vdb + # mkdir -p /mnt/minfs + # mount -t minfs /dev/vdb /mnt/minfs + +Now we can list the contents of the root directory: + +.. code-block:: console + + # ls -l /mnt/minfs + +We notice that there is already a file (``a.txt``); it is created by the formatting utility. + +We also notice that we are not allowed to display information for a file using the ``ls`` command. +This is because we have not implemented the ``lookup`` function. We will implement it in the next exercise. + +To test the functionality provided by the module, we can use the dedicated script: + +.. code-block:: console + + # ./test-minfs-0.sh + # ./test-minfs-1.sh + +2. Lookup operation +^^^^^^^^^^^^^^^^^^^ + +To properly list the contents of a directory, we need to implement the search functionality, ie the ``lookup`` operation. +The ``lookup`` operation is a field within the ``minfs_dir_inode_operations`` structure (of type ``inode_operations``) and is implemented by the ``minfs_lookup`` function. +This function (``minfs_lookup``) needs to be implemented. +We will actually implement the ``minfs_find_entry`` function called by ``minfs_lookup`` . + +Follow directions marked with ``TODO 6`` which will tell you the steps you need to take. + +.. tip:: + + Read the section :ref:`DirectoryInodes` + + As a starting point, read the functions :c:func:`qnx6_find_entry` and :c:func:`minix_find_entry`. + +In the ``minfs_find_entry`` function, iterate over the directory where the dentry is: ``dentry->d_parent->d_inode``. +Iterating means going through the entries in the directory's data block (of type ``struct minfs_dir_entry``) and locate, if it exists, the requested entry. + +.. tip:: + + From the structure of type ``struct minfs_inode_info`` corresponding to the directory, find out the data block index and read it (``sb_read``). + You will access the block contents using ``bh->b_data``. + The directory data block contains an array of at most ``MINFS_NUM_ENTRIES`` entries of type ``struct minfs_dir_entry``. + Use pointer arithmetics to get entries of type ``struct minfs_dir_entry`` from the data block (``bh->b_data``). + + Check the presence of the name (stored in the local variable ``name``) in the directory (if there is an entry in the data block whose name is a string equal to the given name). Use :c:func:`strcmp` to verify. + + Ignore dentries that have an ``ino`` field equal to ``0``. Those dentries are free slots in the directory dentry list. + + Store in the ``final_de`` variable the dentry found. + If you do not find any dentry, then the ``final_de`` variable will have the value ``NULL``, the value with which it was initialized. + +Comment the ``simple_lookup`` call in the ``minfs_lookup`` function to invoke the implementation of ``minfs_readdir``. + +Testing +""""""" + +For testing, we use the steps described in the previous exercise. +The long file listing (``ls -l``) of the contents of a directory (root directory) will display permissions and other file-specific information: + +.. code-block:: console + + # ls -l /mnt/minfs + +To test the functionality provided by the module, we can use the dedicated scripts: + +.. code-block:: console + + # ./test-minfs-0.sh + # ./test-minfs-1.sh + +If the implementation is correct, no error messages will be displayed when running the scripts above. + +.. note:: + + After mounting the file system using the command + + .. code-block:: console + + # mount -t minfs /dev/vdb /mnt/minfs + + we try to create a file using the command + + .. code-block:: console + + # touch /mnt/minfs/peanuts.txt + + We notice that we get an error because we did not implement the directory operations that allow us to create a file. + We will do this for the next exercise. + +3. Create operation +^^^^^^^^^^^^^^^^^^^ + +In order to allow the creation of a file in a directory, we must implement the ``create`` operation. +The ``create`` operation is a field in the ``minfs_dir_inode_operations`` structure (of type :c:type:`inode_operations`) and is implemented by the ``minfs_create`` function. We need to implement this function. +In fact, we will implement the ``minfs_new_inode`` (which creates and initializes an inode) and ``minfs_add_link`` which adds a link (or name or *dentry*) for the created inode. + +Follow directions marked with ``TODO 7`` which will guide you through the steps you need to take. + +.. tip:: + + Read the section :ref:`DirectoryInodes` + + Inspect the code in the ``minfs_create`` and the skeleton of functions ``minfs_new_inode`` and ``minfs_add_link``. + +Implement the function ``minfs_new_inode``. Inside this function you will create (using :c:func:`new_inode`) and initialize an inode. The initialization is done using the data from disk. + +.. tip:: + + Use the :c:func:`minix_new_inode` function as a model. + Find the first free inode in imap (``sbi->imap``). + Use bitwise operations (``find_first_zero_bit`` and ``set_bit``). + Read the :ref:`BitmapOperations` section. + + The buffer for the superblock (``sbi->sbh``) must be marked as dirty . + + You must initialize the usual fields as it is done for the ``myfs`` file system. + Initialize the ``i_mode`` field to ``0`` in the call to ``inode_init_owner``. It will be initialized in the caller later. + +Implement the ``minfs_add_link`` function. The function adds a new dentry (``struct minfs_dir_entry``) to the parent directory data block (``dentry->d_parent->d_inode``). + +.. tip:: + + Use the function ``minix_add_link`` function as a model. + +In ``minfs_add_link`` we want to find the first free place for the dentry. +For this, you will iterate over the directory data block and you will find the first free entry. A free dentry has the ``ino`` field equal to ``0``. + +.. tip:: + + In order to work with the directory, get the inode of type ``struct minfs_inode_info`` corresponding to the parent directory (the **dir** inode). + Do not use the variable ``inode`` to get ``struct minfs_inode_info``; that inode belongs to the file, not to the parent directory inside which you want to add the link/dentry. + To get the ``struct minfs_inode_info`` structure, use :c:func:`container_of`. + + The structure ``struct minfs_inode_info`` is useful for finding the directory data block (the one indicated by the ``dentry->d_parent->d_inode``, which is the ``dir`` variable). + From this structure, get the ``data_block`` field, representing index of the data block on the disk. + This block contains the entries in the directory. Use :c:func:`sb_bread` to read the block and then ``bh->b_data`` to refer to the data. + The block contains at most ``MINFS_NUM_ENTRIES`` entries of type ``struct minfs_dir_entry``. + + If all entries are occupied, return ``-ENOSPC``. + + Iterate over the entries in the data block using the variable ``de`` and extract the first free entry (for which the ``ino`` field is ``0``). + + When you have found a free place, fill in the corresponding entry: + + * the ``inode->i_ino`` field in ``de->ino`` + * the ``dentry->d_name.name`` field in ``de->name`` + + Then mark the buffer dirty. + + +Testing +""""""" + +For testing, we use the steps described in the previous exercise. +Now we can create files within the file system: + +.. code-block:: console + + # touch /mnt/minfs/peanuts.txt + +To test the functionality provided by the module, we can use the dedicated script: + +.. code-block:: console + + # ./test-minfs-2.sh + +If the deployment is valid, no error messages will be displayed following the above script run. + +.. note:: + + The current implementation of the ``minfs`` file system is not definitive. + To be complete, the implementations needs function to delete files, create and delete directories, rename entries, and modify the contents of a file. + diff --git a/Documentation/teaching/labs/infrastructure.rst b/Documentation/teaching/labs/infrastructure.rst new file mode 100644 index 00000000000000..b3b4873c01e114 --- /dev/null +++ b/Documentation/teaching/labs/infrastructure.rst @@ -0,0 +1,82 @@ +Infrastructure +============== + +In order to facilitate learning each topic has a hands-on exercises +section which will contain in-depth, incremental clues on how to solve +one or multiple tasks. To focus on a particular issue most of the +tasks will be performed on existing skeleton drivers. Each skeleton +driver has clearly marked sections that needs to be filled in order to +complete the tasks. + +The skeleton drivers are generated from full source examples located +in tools/labs/templates. To solve tasks you start by generating the +skeleton drivers, running the **skels** target in *tools/labs*. To +keep the workspace clean it is recommended to generate the skeletons +for one lab only and clean the workspace before start working on a new +lab. Labs can be selected by using the **LABS** variable: + +.. code-block:: shell + + tools/labs $ make clean + tools/labs $ LABS=kernel_modules make skels + + tools/labs $ ls skels/kernel_modules/ + 1-2-test-mod 3-error-mod 4-multi-mod 5-oops-mod 6-cmd-mod \ + 7-list-proc 8-kprobes 9-kdb + +You can also use the same variable to generate skeletons for specific +tasks: + +.. code-block:: shell + + tools/labs $ LABS="kernel_modules/6-cmd-mod kernel_modules/8-kprobes" make skels + + tools/labs$ ls skels/kernel_modules + 6-cmd-mod 8-kprobes + + +For each task you may have multiple steps to perform, usually +incremental. These steps are marked in the source code as well as in +the lab exercises with the keyword *TODO*. If we have multiple steps +to perform they will be prefixed by a number, like *TODO1*, *TODO2*, +etc. If no number is used it is assumed to be the one and only +step. If you want to resume a task from a certain step, you can using +the **TODO** variable. The following example will generate the +skeleton with the first *TODO* step resolved: + +.. code-block:: shell + + tools/labs $ TODO=2 LABS="kernel_modules/8-kprobes" skels + +Once the skelton drivers are generated you can build them with the +**build** make target: + +.. code-block:: shell + + tools/labs $ make build + echo "# autogenerated, do not edit " > skels/Kbuild + for i in ./kernel_modules/8-kprobes; do echo "obj-m += $i/" >> skels/Kbuild; done + make -C /home/tavi/src/linux M=/home/tavi/src/linux/tools/labs/skels ARCH=x86 modules + make[1]: Entering directory '/home/tavi/src/linux' + CC [M] /home/tavi/src/linux/tools/labs/skels/./kernel_modules/8-kprobes/kprobes.o + Building modules, stage 2. + MODPOST 1 modules + CC /home/tavi/src/linux/tools/labs/skels/./kernel_modules/8-kprobes/kprobes.mod.o + LD [M] /home/tavi/src/linux/tools/labs/skels/./kernel_modules/8-kprobes/kprobes.ko + make[1]: Leaving directory '/home/tavi/src/linux' + + +To copy the drivers to the VM you can use either use ssh or update the +VM image directly using the **copy** target: + +.. code-block:: shell + + tools/labs $ make copy + ... + 'skels/kernel_modules/8-kprobes/kprobes.ko' -> '/tmp/tmp.4UMKcISmQM/home/root/skels/kernel_modules/8-kprobes/kprobes.ko' + +.. attention:: The **copy** target will fail if the VM is + running. This is intentional so that we avoid corrupting the + filesystem. + + diff --git a/Documentation/teaching/labs/interrupts.rst b/Documentation/teaching/labs/interrupts.rst new file mode 100644 index 00000000000000..f5772846b19cf2 --- /dev/null +++ b/Documentation/teaching/labs/interrupts.rst @@ -0,0 +1,1120 @@ +========================== +I/O access and Interrupts +========================== + +Lab objectives +============== + +* communication with peripheral devices +* implement interrupt handlers +* synchronizing interrupts with process context + +Keywords: IRQ, I/O port, I/O address, base address, UART, request_region, release_region, inb, outb + +Background information +====================== + +A peripheral device is controlled by writing and reading its +registers. Often, a device has multiple registers that can be accessed +at consecutive addresses either in the memory address space or in the +I/O address space. Each device connected to the I/O bus has a set of +I/O addresses, called I/O ports. I/O ports can be mapped to physical +memory addresses so that the processor can communicate with the device +through instructions that work directly with the memory. For +simplicity, we will directly use I/O ports (without mapping to physical +memory addresses) to communicate with physical devices. + +The I/O ports of each device are structured into a set of specialized +registers to provide a uniform programming interface. Thus, most +devices will have the following types of registers: + +* **Control** registers that receive device commands +* **Status** registers, which contain information about the device's + internal status +* **Input** registers from which data is taken from the device +* **Output** registers in which the data is written to transmit it to the + device + +Physical ports are differentiated by the number of bits: they can be +8, 16 or 32-bit ports. + +For example, the parallel port has 8 8-bit I/O ports starting at base +address 0x378. The data log is found at base address (0x378), status +register at base + 1 (0x379), and control at base address + 2 +(0x37a). The data log is both an entry and exit log. + +Although there are devices that can be fully controlled using I/O +ports or special memory areas, there are situations where this is +insufficient. The main problem that needs to be addressed is that +certain events occur at undefined moments in time and it is +inefficient for the processor (CPU) to interrogate the status of the +device repeatedly (polling). The way to solve this problem is using an +Interrupt ReQuest (IRQ) which is a hardware notification by which the +processor is announced that a particular external event happened. + +For IRQs to be useful device drivers must implement handlers, i.e. a +particular sequence of code that handles the interrupt. Because in +many situations the number of interrupts available is limited, a +device driver must behave in an orderly fashion with interruptions: +interrupts must be requested before being used and released when they +are no longer needed. In addition, in some situations, device drivers +must share an interrupt or synchronize with interrupts. All of these will be +discussed further. + +When we need to access shared resources between an interrupt +routine (A) and code running in process context or in bottom-half +context (B), we must use a special synchronization technique. In (A) +we need to use a spinlock primitive, and in (B) we must disable +interrupts AND use a spinlock primitive. Disabling interrupts is not +enough because the interrupt routine can run on a processor other than +the one running (B). + +Using only a spinlock can lead to a deadlock. The classic example of +deadlock in this case is: + +1. We run a process on the X processor, and we acquire the lock +2. Before releasing the lock, an interrupt is generated on the X processor +3. The interrupt handling routine will try to acquire the lock and it + will go into an infinite loop + + +Accessing the hardware +====================== + +In Linux, the I/O ports access is implemented on all architectures and +there are several APIs that can be used. + +Request access to I/O ports +--------------------------- + +Before accessing I/O ports we first must request access to them, to +make sure there is only one user. In order to do so, one must use the +:c:func:`request_region` function: + +.. code-block:: c + + #include + + struct resource *request_region(unsigned long first, unsigned long n, + const char *name); + +To release a reserved region one must use the :c:func:`release_region` function: + +.. code-block:: c + + void release_region(unsigned long start, unsigned long n); + + +For example, the serial port COM1 has the base address 0x3F8 and it +has 8 ports and this is a code snippet of how to request access to +these ports: + +.. code-block:: c + + #include + + #define MY_BASEPORT 0x3F8 + #define MY_NR_PORTS 8 + + if (!request_region(MY_BASEPORT, MY_NR_PORTS, "com1")) { + /* handle error */ + return -ENODEV; + } + +To release the ports one would use something like: + +.. code-block:: c + + release_region(MY_BASEPORT, MY_NR_PORTS); + +Most of the time, port requests are done at the driver initialization +or probe time and the port releasing is done at the removal of the +device or module. + +All of the port requests can be seen from userspace via the +:file:`/proc/ioports` file: + +.. code-block:: shell + + $ cat /proc/ioports + 0000-001f : dma1 + 0020-0021 : pic1 + 0040-005f : timer + 0060-006f : keyboard + 0070-0077 : rtc + 0080-008f : dma page reg + 00a0-00a1 : pic2 + 00c0-00df : dma2 + 00f0-00ff : fpu + 0170-0177 : ide1 + 01f0-01f7 : ide0 + 0376-0376 : ide1 + 0378-037a : parport0 + 037b-037f : parport0 + 03c0-03df : vga+ + 03f6-03f6 : ide0 + 03f8-03ff : serial + ... + + +Accessing I/O ports +------------------- + +After a driver has obtained the desired I/O port range, one can +perform read or write operations on these ports. Since physical ports +are differentiated by the number of bits (8, 16, or 32 bits), there +are different port access functions depending on their size. The +following port access functions are defined in asm/io.h: + + +* *unsigned inb(int port)*, reads one byte (8 bits) from port +* *void outb(unsigned char byte, int port)*, writes one byte (8 bits) to port +* *unsigned inw(int port)*, reads two bytes (16-bit) ports +* *void outw(unsigned short word, int port)*, writes two bytes (16-bits) to port +* *unsigned inl (int port)*, reads four bytes (32-bits) from port +* *void outl(unsigned long word, int port)*, writes four bytes (32-bits) to port + +The port argument specifies the address of the port where the reads or +writes are done, and its type is platform dependent (may be unsigned +long or unsigned short). + +Some devices may have problems when the processor is trying to +transfer data too fast to and from the device. To avoid this issue we +may need to insert a delay after an I/O operation and there are functions +you can use that introduce this delay. Their names are similar to +those described above, with the exception that it ends in _p: inb_p, +outb_p, etc. + +For example, the following sequence writes a byte on COM1 serial port +and then reads it: + +.. code-block:: c + + #include + #define MY_BASEPORT 0x3F8 + + unsigned char value = 0xFF; + outb(value, MY_BASEPORT); + value = inb(MY_BASEPORT); + +5. Accessing I/O ports from userspace +------------------------------------- + +Although the functions described above are defined for device drivers, +they can also be used in user space by including the +header. In order to be used, ioperm or iopl must first be called to +get permission to perform port operations. The ioperm function obtains +permission for individual ports, while iopl for the entire I/O address +space. To use these features, the user must be root. + +The following sequence used in user space gets permission for the +first 3 ports of the serial port, and then releases them: + +.. code-block:: c + + #include + #define MY_BASEPORT 0x3F8 + + if (ioperm(MY_BASEPORT, 3, 1)) { + /* handle error */ + } + + if (ioperm(MY_BASEPORT, 3, 0)) { + /* handle error */ + } + +The third parameter of the ioperm function is used to request or +release port permission: 1 to get permission and 0 to release. + +Interrupt handling +================== + +Requesting an interrupt +----------------------- + +As with other resources, a driver must gain access to an interrupt +line before it can use it and release it at the end of the execution. + +In Linux, the request to obtain and release an interrupt is done using +the :c:func:`requests_irq` and :c:func:`free_irq` functions: + +.. code-block:: c + + #include + + typedef irqreturn_t (*irq_handler_t)(int, void *); + + int request_irq(unsigned int irq_no, irq_handler_t handler, + unsigned long flags, const char *dev_name, void *dev_id); + + void free_irq(unsigned int irq_no, void *dev_id); + +Note that to get an interrupt, the developer calls +:c:func:`request_irq`. When calling this function you must specify the +interrupt number (*irq_no*), a handler that will be called when the +interrupt is generated (*handler*), flags that will instruct the +kernel about the desired behaviour (*flags*), the name of the device +using this interrupt (*dev_name*), and a pointer that can be +configured by the user at any value, and that has no global +significance (*dev_id*). Most of the time, *dev_id* will be +pointer to the device driver's private data. When the interrupt is +released, using the :c:func:`free_irq` function, the developer must +send the same pointer value (*dev_id*) along with the same interrupt +number (*irq_no*). The device name (*dev_name*) is used to display +statistics in */proc/interrupts*. + +The value that :c:func:`request_irq` returns is 0 if the entry was +successful or a negative error code indicating the reason for the +failure. A typical value is *-EBUSY* which means that the interrupt +was already requested by another device driver. + +The *handler* function is executed in interrupt context which means +that we can't call blocking APIs such as :c:func:`mutex_lock` or +:c:func:`msleep`. We must also avoid doing a lot of work in the +interrupt handler and instead use deferred work if needed. The actions +performed in the interrupt handler include reading the device +registers to get the status of the device and acknowledge the +interrupt, operations that most of the time can be performed with +non-blocking calls. + +There are situations where although a device uses interrupts we can't +read the device's registers in a non-blocking mode (for example a +sensor connected to an I2C or SPI bus whose driver does not guarantee +that bus read / write operations are non-blocking ). In this +situation, in the interruption, we must plan a work-in-process action +(work queue, kernel thread) to access the device's registers. Because +such a situation is relatively common, the kernel provides the +:c:func:`request_threaded_irq` function to write interrupt handling +routines running in two phases: a process-phase and an interrupt +context phase: + +.. code-block:: c + + #include + + int request_threaded_irq(unsigned int irq, irq_handler_t handler, + irq_handler_t thread_fn, + unsigned long flags, const char *name, void *dev); + +*handler* is the function running in interrupt context, and will +implement critical operations while the thread_fn function runs in +process context and implements the rest of the operations. + +The flags that can be transmitted when an interruption is made are: + +* *IRQF_SHARED* announces the kernel that the interrupt can be + shared with other devices. If this flag is not set, then if there is + already a handler associated with the requested interrupt, the + request for interrupt will fail. A shared interrupt is handled in a + special way by the kernel: all the associated interrupt handlers + will be executed until the device that generated the interrupt will + be identified. But how can a device driver know if the interrupt + handling routine was activated by an interrupt generated by the + device it manages? Virtually all devices that offer interrupt + support have a status register that can be interrogated in the + handling routine to see if the interrupt was or was not generated by + the device (for example, in the case of the 8250 serial port, this + status register is IIR - Interrupt Information Register). When + requesting a shared interrupt, the dev_id argument must be unique + and it must not be NULL. Usually it is set to module's private + data. + +* *IRQF_ONESHOT* interrupt will be reactivated after running the process + context routine; Without this flag, the interrupt will be + reactivated after running the handler routine in the context of + the interrupt + + +Requesting the interrupt can be done either at the initialization of +the driver (:c:func:`init_module`), when the device is probed, or when +the device is used (e.g. during *open*). + +The following example performs the interrupt request for the COM1 +serial port: + +.. code-block:: c + + #include + + #define MY_BASEPORT 0x3F8 + #define MY_IRQ 4 + + static my_init(void) + { + [...] + struct my_device_data *my_data; + int err; + + err = request_irq(MY_IRQ, my_handler, IRQF_SHARED, + "com1", my_data); + if (err < 0) { + /* handle error*/ + return err; + } + [...] + } + +As you can see, the IRQ for serial port COM1 is 4, which is used in +shared mode (IRQF_SHARED). + +.. attention:: When requesting a shared interrupt (IRQF_SHARED) the + *dev_id* argument can not be NULL. + +To release the interrupt associated with the serial port, the +following operations will be executed: + +.. code-block:: c + + free_irq (MY_IRQ, my_data); + + +During the initialization function (:c:func:`init_module`), or in the +function that opens the device, interrupts must be activated for the +device. This operation is dependent on the device, but most often +involves setting a bit from the control register. + + +As an example, for the 8250 serial port, the following operations must +be performed to enable interrupts: + +.. code-block:: c + + #include + #define MY_BASEPORT 0x3F8 + + outb(0x08, MY_BASEPORT+4); + outb(0x01, MY_BASEPORT+1); + + +In the above example, two operations are performed: + +1. All interruptions are activated by setting bit 3 (Aux Output 2) in + the MCR register - Modem Control Register +2. The RDAI (Transmit Holding Register Empty Interrupt) is activated + by setting the appropriate bit in the IER - Interrupt Enable + Register. + + +Implementing an interrupt handler +--------------------------------- + +Lets take a look at the signature of the interrupt handler function: + +.. code-block:: c + + irqreturn_t (*handler)(int irq_no, void *dev_id); + +The function receives as parameters the number of the interrupt +(*irq_no*) and the pointer sent to :c:func:`request_irq` when the +interrupt was requested. The interrupt handling routine must return a +value with a type of :c:type:`typedef irqreturn_t`. For the current kernel +version, there are three valid values: *IRQ_NONE*, *IRQ_HANDLED*, +and *IRQ_WAKE_THREAD*. The device driver must return *IRQ_NONE* if +it notices that the interrupt has not been generated by the device it +is in charge. Otherwise, the device driver must return *IRQ_HANDLED* +if the interrupt can be handled directly from the interrupt context or +*IRQ_WAKE_THREAD* to schedule the running of the process context +processing function. + +The skeleton for an interrupt handler is: + +.. code-block:: c + + irqreturn_t my_handler(int irq_no, void *dev_id) + { + struct my_device_data *my_data = (struct my_device_data *) dev_id; + + /* if interrupt is not for this device (shared interrupts) */ + /* return IRQ_NONE;*/ + + /* clear interrupt-pending bit */ + /* read from device or write to device*/ + + return IRQ_HANDLED; + } + + +Typically, the first thing executed in the interrupt handler is to +determine whether the interrupt was generated by the device that the +driver ordered. This usually reads information from the device's +registers to indicate whether the device has generated an +interrupt. The second thing is to reset the interrupt pending bit on +the physical device as most devices will no longer generate +interruptions until this bit has been reset (e.g. for the 8250 +serial port bit 0 in the IIR register must be cleared). + + +Locking +------- + +Because the interrupt handlers run in interrupt context the actions +that can be performed are limited: unable to access user space memory, +can't call blocking functions. Also, synchronization using spinlocks is +tricky and can lead to deadlocks if the spinlock used is already +acquired by a process that has been interrupted by the running +handler. + +However, there are cases where device drivers have to synchronize +using interrupts, such as when data is shared between the interrupt +handler and process context or bottom-half handlers. In these +situations it is necessary to both deactivate the interrupt and use +spinlocks. + +There are two ways to disable interrupts: disabling all interrupts, at +the processor level, or disabling a particular interrupt at the device +or interrupt controller level. Processor disabling is faster and is +therefore preferred. For this purpose, there are locking functions +that disable and enable interrupts acquiring and release a spinlock at +the same time: :c:func:`spin_lock_irqsave`, +:c:func:`spin_unlock_irqrestore`, :c:func:`spin_lock_irq`, and +:c:func:`spin_unlock_irq`: + +.. code-block:: c + + #include + + void spin_lock_irqsave (spinlock_t * lock, unsigned long flags); + void spin_unlock_irqrestore (spinlock_t * lock, unsigned long flags); + + void spin_lock_irq (spinlock_t * lock); + void spin_unlock_irq (spinlock_t * lock); + +The :c:func:`spin_lock_irqsave` function disables interrupts for the +local processor before it obtains the spinlock; The previous state of +the interrupts is saved in *flags*. + +If you are absolutely sure that the interrupts on the current +processor have not already been disabled by someone else and you are +sure you can activate the interrupts when you release the spinlock, +you can use :c:func:`spin_lock_irq`. + +For read / write spinlocks there are similar functions available: + +* :c:func:`read_lock_irqsave` +* :c:func:`read_unlock_irqrestore` +* :c:func:`read_lock_irq` +* :c:func:`read_unlock_irq` +* :c:func:`write_lock_irqsave` +* :c:func:`write_unlock_irqrestore` +* :c:func:`write_lock_irq` +* :c:func:`write_unlock_irq` + +If we want to disable interrupts at the interrupt controller level +(not recommended because disabling a particular interrupt is slower, +we can not disable shared interrupts) we can do this with +:c:func:`disable_irq`, :c:func:`disable_irq_nosync`, and +:c:func:`enable_irq`. Using these functions will disable the interrupts on +all processors. Calls can be nested: if disable_irq is called twice, +it will require as many calls enable_irq to enable it. The difference +between disable_irq and disable_irq_nosync is that the first one will +wait for the executed handlers to finish. Because of this, +:c:func:`disable_irq_nosync` is generally faster, but may lead to +races with the interrupts handler, so when not sure use +:c:func:`disable_irq`. + +The following sequence disables and then enables the interrupt for +the COM1 serial port: + +.. code-block:: c + + #define MY_IRQ 4 + + disable_irq (MY_IRQ); + enable_irq (MY_IRQ); + +It is also possible to disable interrupts at the device level. This +approach is also slower than disabling interrupts at the processor +level, but it works with shared interrupts. The way to accomplish this +is device specific and it usually means we have to clear a bit from +one of the control registers. + +It is also possible to disable all interrupts for the current +processor independent of taking locks. Disabling all interruptions by +device drivers for synchronization purposes is inappropriate because +races are still possible if the interrupt is handled on another +CPU. For reference, the functions that disable / enable interrupts on +the local processor are :c:func:`local_irq_disable` and +:c:func:`local_irq_enable`. + +In order to use a resource shared between process context and the +interrupt handling routine, the functions described above will be used +as follows: + +.. code-block:: c + + static spinlock_t lock; + + /* IRQ handling routine: interrupt context */ + irqreturn_t kbd_interrupt_handle(int irq_no, void * dev_id) + { + ... + spin_lock(&lock); + /* Critical region - access shared resource */ + spin_unlock (&lock); + ... + } + + /* Process context: Disable interrupts when locking */ + static void my_access(void) + { + unsigned long flags; + + spin_lock_irqsave(&lock, flags); + /* Critical region - access shared resource */ + spin_unlock_irqrestore(&lock, flags); + + ... + } + + void my_init (void) + { + ... + spin_lock_init (&lock); + ... + } + + +The *my_access function* above runs in process context. To +synchronize access to the shared data, we disable the interrupts and +use the spinlock *lock*, i.e. the :c:func:`spin_lock_irqsave` and +:c:func:`spin_unlock_irqrestore` functions. + +In the interrupt handling routine, we use the :c:func:`spin_lock` and +:c:func:`spin_unlock` functions to access the shared resource. + +.. note:: The *flags* argument for :c:func:`spin_lock_irqsave` and + :c:func:`spin_unlock_irqrestore` is a value and not a pointer but keep + in mind that :c:func:`spin_lock_irqsave` function changes the value of + the flag, since this is actually a macro. + +Interrupt statistics +-------------------- + +Information and statistics about system interrupts can be found in +*/proc/interrupts* or */proc/stat*. Only system interrupts with +associated interrupt handlers appear in */proc/interrupts*: + +.. code-block:: shell + + # cat /proc/interrupts + CPU0 + 0: 7514294 IO-APIC-edge timer + 1: 4528 IO-APIC-edge i8042 + 6: 2 IO-APIC-edge floppy + 8: 1 IO-APIC-edge rtc + 9: 0 IO-APIC-level acpi + 12: 2301 IO-APIC-edge i8042 + 15: 41 IO-APIC-edge ide1 + 16: 3230 IO-APIC-level ioc0 + 17: 1016 IO-APIC-level vmxnet ether + NMI: 0 + LOC: 7229438 + ERR: 0 + MIS: 0 + +The first column specifies the IRQ associated with the interrupt. The +following column shows the number of interrupts that were generated +for each processor in the system; The last two columns provide +information about the interrupt controller and the device name that +registered the handler for that interrupt. + +The */proc/state* file provides information about system activity, +including the number of interruptions generated since the last (re)boot +of the system: + +.. code-block:: shell + + # cat /proc/stat | grep in + intr 7765626 7754228 4620 0 0 0 0 2 0 1 0 0 0 2377 0 0 41 3259 1098 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + +Each line in the */proc/state* file begins with a keyword that +specifies the meaning of the information on the line. For information +on interrupts, this keyword is intr. The first number on the line +represents the total number of interrupts, and the other numbers +represent the number of interrupts for each IRQ, starting at 0. The +counter includes the number of interrupts for all processors in the +system. + + +Further reading +=============== + +Serial Port +----------- + +* `Serial Port `_ +* `Interfacing the Serial / RS232 Port `_ + + +Parallel port +------------- + +* `Interfacing the Standard Parallel Port `_ +* `Parallel Port Central `_ + +Keyboard controller +------------------- + +* `Intel 8042 `_ +* drivers/input/serio/i8042.c +* drivers/input/keyboard/atkbd.c + +Linux device drivers +-------------------- + +* `Linux Device Drivers, 3rd ed., Ch. 9 - Communicating with Hardware `_ +* `Linux Device Drivers, 3rd ed., Ch. 10 - Interrupt Handling `_ +* `Interrupt Handlers `_ + + +Exercises +========= + +.. include:: ../labs/exercises-summary.hrst +.. |LAB_NAME| replace:: interrupts + +0. Intro +-------- + +Using |LXR|_, find the definitions of the following symbols in the Linux kernel: + +* :c:type:`struct resource` +* :c:func:`request_region` and :c:func:`__request_region` +* :c:func:`request_irq` and :c:func:`request_threaded_irq` +* :c:func:`inb` for the x86 architecture. + +Analyze the following Linux code: + +* Keyboard initialization function :c:func:`i8042_setup_kbd` +* The AT or PS/2 keyboard interrupt function :c:func:`atkbd_interrupt` + +Keyboard driver +------------------ + +The next exercise's objective is to create a driver that uses the +keyboard IRQ, inspect the incoming key codes and stores them in a +buffer. The buffer will be accessible from userspace via character +device driver. + +1. Request the I/O ports +------------------------ + +To start with, we aim to allocate memory in the I/O space for hardware +devices. We will see that we cannot allocate space for the keyboard +because the designated region is already allocated. Then we will allocate +I/O space for unused ports. + +The *kbd.c* file contains a skeleton for the keyboard driver. Browse +the source code and inspect :c:func:`kbd_init`. Notice that the I/O +ports we need are I8042_STATUS_REG and I8042_DATA_REG. + +Follow the sections maked with **TODO 1** in the skeleton. Request the I/O +ports in :c:func:`kbd_init` and make sure to check for errors and to properly +clean-up in case of errors. When requesting, set the reserving caller's ID +string (``name``) with ``MODULE_NAME`` macro. Also, add code to release the I/O +ports in :c:func:`kbd_exit`. + +.. note:: You can review the `Request access to I/O ports`_ section before + proceeding. + +Now build the module and copy it to the VM image: + +.. code-block:: shell + + tools/labs $ make build + tools/labs $ make copy + + +Now start the VM and insert the module: + +.. code-block:: shell + + root@qemux86:~# insmod skels/interrupts/kbd.ko + kbd: loading out-of-tree module taints kernel. + insmod: can't insert 'skels/interrupts/kbd.ko': Device or resource busy + +Notice that you get an error when trying to request the I/O +ports. This is because we already have a driver that has requested the +I/O ports. To validate check the :file:`/proc/ioports` file for the +``STATUS_REG`` and ``DATA_REG`` values: + +.. code-block:: shell + + root@qemux86:~# cat /proc/ioports | egrep "(0060|0064)" + 0060-0060 : keyboard + 0064-0064 : keyboard + + +Lets find out which driver register these ports and try to remove the +module associated with it. + +.. code-block:: shell + + $ find -name \*.c | xargs grep \"keyboard\" + + find -name \*.c | xargs grep \"keyboard\" | egrep '(0x60|0x64)' + ... + ./arch/x86/kernel/setup.c:{ .name = "keyboard", .start = 0x60, .end = 0x60, + ./arch/x86/kernel/setup.c:{ .name = "keyboard", .start = 0x64, .end = 0x64 + +It looks like the I/O ports are registered by the kernel during the +boot, and we won't be able to remove the associated module. Instead, +let's trick the kernel and register ports 0x61 and 0x65. + +Use the function :c:func:`request_region` (inside the :c:func:`kbd_init` +function) to allocate the ports and the function :c:func:`release_region` +(inside the :c:func:`kbd_exit` function) to release the allocated memory. + +This time we can load the module and */proc/ioports* shows that the +owner of these ports is our module: + +.. code-block:: shell + + root@qemux86:~# insmod skels/interrupts/kbd.ko + kbd: loading out-of-tree module taints kernel. + Driver kbd loaded + root@qemux86:~# cat /proc/ioports | grep kbd + 0061-0061 : kbd + 0065-0065 : kbd + +Let's remove the module and check that the I/O ports are released: + +.. code-block:: shell + + root@qemux86:~# rmmod kbd + Driver kbd unloaded + root@qemux86:~# cat /proc/ioports | grep kbd + root@qemux86:~# + +2. Interrupt handling routine +----------------------------- + +For this task we will implement and register an interrupt handler for +the keyboard interrupt. You can review the `Requesting an interrupt`_ +section before proceeding. + +Follow the sections marked with **TODO 2** in the skeleton. + +First, define an empty interrupt handling routine named +:c:func:`kbd_interrupt_handler`. + +.. note:: Since we already have a driver that uses this interrupt we + should report the interrupt as not handled (i.e. return + :c:type:`IRQ_NONE`) so that the original driver still has a + chance to process it. + +Then register the interrupt handler routine using +:c:type:`request_irq`. The interrupt number is defined by the +`I8042_KBD_IRQ` macro. The interrupt handling routine must be +requested with :c:type:`IRQF_SHARED` to share the interrupt line with +the keyboard driver (i8042). + +.. note:: For shared interrupts, *dev_id* can not be NULL . Use + ``&devs[0]``, that is pointer to :c:type:`struct kbd`. This + structure contains all the information needed for device + management. To see the interrupt in */proc/interrupts*, do + not use NULL for *dev_name* . You can use the MODULE_NAME + macro. + + If the interrupt requesting fails make sure to properly + cleanup by jumping to the right label, in this case the one + the releases the I/O ports and continues with unregistering + the character device driver. + +Compile, copy and load module in the kernel. Check that the interrupt +line has been registered by looking at */proc/interrupts* . Determine +the IRQ number from the source code (see `I8042_KBD_IRQ`) and verify +that there are two drivers registered at this interrupt line (which +means that we have a shared interrupt line): the i8042 initial driver +and our driver. + +.. note:: More details about the format of the */proc/interrupts* can + be found in the `Interrupt statistics`_ section. + +Print a message inside the routine to make sure it is called. Compile +and reload the module into the kernel. Check that the interrupt handling +routine is called when you press the keyboard on the virtual machine, +using :command:`dmesg`. Also note that when you use the serial port no +keyboard interrupt is generated. + +.. attention:: To get access to the keyboard on the virtual machine + boot with "QEMU_DISPLAY=gtk make boot". + +3. Store ASCII keys to buffer +----------------------------- + +Next, we want to collect the keystrokes in a buffer whose content we +will then send to the user space. For this routine we will add the +following in the interrupt handling: + +* capture the pressed keys (only pressed, ignore released) +* identify the ASCII characters. +* copy the ASCII characters corresponding to the keystrokes and store + them in the buffer of the device + +Follow the sections marked **TODO 3** in the skeleton. + +Reading the data register +......................... + +First, fill in the :c:func:`i8042_read_data` function to read the +``I8042_DATA_REG`` of the keyboard controller. The function +just needs to return the value of the register. The value of the +registry is also called scancode, which is what is generated at each +keystroke. + +.. hint:: Read the ``I8042_DATA_REG`` register using :c:func:`inb` and + store the value in the local variable :c:type:`val`. + Revisit the `Accessing I/O ports`_ section. + +Call the :c:func:`i8042_read_data` in the +:c:func:`kbd_interrupt_handler` and print the value read. + +Print information about the keystrokes in the following format: + +.. code-block:: c + + pr_info("IRQ:% d, scancode = 0x%x (%u,%c)\n", + irq_no, scancode, scancode, scancode); + + +Where scancode is the value of the read register using the +:c:func:`i8042_read_data` function. + +Notice that the scancode (reading of the read register) is not an ASCII +character of the pressed key. We'll have to understand the scancode. + +Interpreting the scancode +......................... + +Note that the registry value is a scancode, not the ASCII value of the +character pressed. Also note that an interrupt is sent both when the +key is pressed and when the key is released. We only need to select +the code when the key is pressed and then and decode the ASCII +character. + +.. note:: To check scancode, we can use the showkey command (showkey + -s). + + In this form, the command will display the key scancodes for + 10 seconds after the last pressed key end then it will + stop. If you press and release a key you will get two + scancodes: one for the pressed key and one for the released + key. E.g: + + * If you press the ENTER key, you will get the 0x1c ( 0x1c ) + and 0x9c (for the released key) + * If you press the key a you will get the 0x1e (key pressed) + and 0x9e (for the key release) + * If you press b you will get 0x30 (key pressed) and 0xb0 + (for the release key) + * If you press the c key, you will get the 0x2e (key + pressed) 0xae and 0xae (for the released key) + * If you press the Shift key you will get the 0x2a (key + pressed) 0xaa and 0xaa (for the released key) + * If you press the Ctrl key you will get the 0x1d (key + pressed) and 0x9d (for the release key) + + As also indicated in this `article + `_, a key + release scancode is 128 (0x80) higher then a key press + scancode. This is how we can distinguish between a press + key scancode and a release scancode. + + A scancode is translated into a keycode that matches a + key. A pressed scanned keycode and a released scancode + have the same keycode. For the keys shown above we have + the following table: + + .. flat-table:: + + * - Key + - Key Press Scancode + - Key Release Scancode + - Keycode + + * - ENTER + - 0x1c + - 0x9c + - 0x1c (28) + + * - a + - 0x1e + - 0x9e + - 0x1e (30) + + * - b + - 0x30 + - 0xb0 + - 0x30 (48) + + * - c + - 0x2e + - 0xae + - 0x2e (46) + + * - Shift + - 0x2a + - 0xaa + - 0x2a (42) + + * - Ctrl + - 0x1d + - 0x9d + - 0x1d (29) + + The press / release key is performed in the is_key_press() + function and obtaining the ASCII character of a scancode + takes place in the get_ascii() function. + +In the interrupt handler check the scancode to see if the key is +pressed or released then determine the corresponding ASCII +character. + +.. hint:: To check for press / release, use :c:func:`is_key_press`. + Use :c:func:`get_ascii` function to get the corresponding + ASCII code. Both functions expect the scancode. + + +.. hint:: To display the received information use the following + format. + + .. code-block:: c + + pr_info("IRQ %d: scancode=0x%x (%u) pressed=%d ch=%c\n", + irq_no, scancode, scancode, pressed, ch); + + Where scancode is the value of the data register, and ch is + the value returned by the get_ascii() function. + +Store characters to the buffer +............................... + +We want to collect the pressed characters (not the other keys) into +a circular buffer that can be consumed from user space. + +Update the interrupt handler to add a pressed ASCII character to the +end of the device buffer. If the buffer is full, the character will be +discarded. + +.. hint:: The device buffer is the field :c:type:`buf` in the device's + :c:type:`struct kbd`. To get the device data from the interrupt handler + use the following construct: + + .. code-block:: c + + struct kbd *data = (struct kbd *) dev_id; + + The buffer's dimension is located in :c:type:`struct kbd`'s field, + :c:type:`count`. The :c:type:`put_idx` and :c:type:`get_idx` fields + specify the next writing and reading index. Take a look at the + :c:func:`put_char` function's implementation to observe how the data is + added to the circular buffer. + +.. attention:: Synchronize the access to the buffer and the helper + indexes with a spinlock. + Define the spinlock in the device struct :c:type:`struct kbd` + and initialize it in :c:func:`kbd_init`. + + Use the :c:func:`spin_lock` and :c:func:`spin_unlock` functions + to protect the buffer in the interrupt handler. + + Revisit the `Locking`_ section. + +4. Reading the buffer +---------------------- + +In order to have access to the keylogger's data, we have to send it to +the user space. We will do this using the */dev/kbd* character device. When +reading from this device, we will get the data from the buffer in the kernel +space, where we collected the keys pressed. + +For this step +follow the sections marked with **TODO 4** in the :c:func:`kbd_read` function. + +Implement :c:func:`get_char` in a similar way to :c:func:`put_char`. Be careful +when implementing the circular buffer. + +In the :c:func:`kbd_read` function copy the data from the buffer to the +userspace buffer. + +.. hint:: Use :c:func:`get_char` to read a character from the buffer + and :c:func:`put_user` to store it to the user buffer. + +.. attention:: In the read function, use :c:func:`spin_lock_irqsave` and + :c:func:`spin_unlock_irqrestore` for locking. + + Revisit the `Locking`_ section. + +.. attention:: We cannot use :c:func:`put_user` or :c:func:`copy_to_user` + while holding the lock, as userpace access is not permitted from + atomic contexts. + + For more info, read the :ref:`Access to the address space of the + process section <_access_to_process_address_space>` in the + previous lab. + +For testing, you will need to create the */dev/kbd* character device +driver using the mknod before reading from it. The device master and +minor are defined as ``KBD_MAJOR`` and ``KBD_MINOR``: + +.. code-block:: c + + mknod /dev/kbd c 42 0 + +Build, copy and boot the virtual machine and load the module. Test it +using the command: + +.. code-block:: c + + cat /dev/kbd + + +5. Reset the buffer +------------------- + +Reset the buffer if the device is written to. For this step follow the +sections marked with **TODO 5** in the skeleton. + +Implement :c:func:`reset_buffer` and add the write operation to *kbd_fops*. + +.. attention:: In the write function Use :c:func:`spin_lock_irqsave` and + :c:func:`spin_unlock_irqrestore` for locking when resetting the + buffer. + + Revisit the `Locking`_ section. + +For testing, you will need to create the */dev/kbd* character device +driver using the mknod before reading from it. The device master and +minor are defined as ``KBD_MAJOR`` and ``KBD_MINOR``: + +.. code-block:: c + + mknod /dev/kbd c 42 0 + +Build, copy and boot the virtual machine and load the module. +Test it using the command: + +.. code-block:: c + + cat /dev/kbd + +Press some keys, then run the command :command:`echo "clear" > /dev/kbd`. +Check the buffer's content again. It should be reset. + +Extra Exercises +=============== + +1. kfifo +--------- + +Implement a keylogger using the +`kfifo API `_. + +.. hint:: Follow the `API call examples from the kernel code `_. + For example, the file `bytestream-examples.c `_. diff --git a/Documentation/teaching/labs/introduction.rst b/Documentation/teaching/labs/introduction.rst new file mode 100644 index 00000000000000..98e3a6302e3b26 --- /dev/null +++ b/Documentation/teaching/labs/introduction.rst @@ -0,0 +1,887 @@ +============ +Introduction +============ + +Lab objectives +============== + +* presenting the rules and objectives of the Operating Systems 2 lab +* introducing the lab documentation +* introducing the Linux kernel and related resources + +Keywords +======== + +* kernel, kernel programming +* Linux, vanilla, http://www.kernel.org +* cscope, LXR +* gdb, /proc/kcore, addr2line, dump\_stack + +.. + _[SECTION-ABOUT-BEGIN] + +About this laboratory +===================== + +The Operating Systems 2 lab is a kernel programming and driver development lab. +The objectives of the laboratory are: + +* deepening the notions presented in the course +* presentation of kernel programming interfaces (kernel API) +* gaining documenting, development and debugging skills on a freestanding + environment +* acquiring knowledge and skills for drivers development + +A laboratory will present a set of concepts, applications and commands +specific to a given problem. The lab will start with a presentation +(each lab will have a set of slides) (15 minutes) and the remaining +time will be allocated to the lab exercises (80 minutes). + +For best laboratory performance, we recommend that you read the related slides. +To fully understand a laboratory, we recommend going through the lab support. For +in-depth study, use the supporting documentation. + +.. + _[SECTION-ABOUT-END] + +.. + _[SECTION-REFERENCES-BEGIN] + +References +========== + +- Linux + + - `Linux Kernel Development, 3rd + Edition `__ + - `Linux Device Drivers, 3rd + Edition `__ + - `Essential Linux Device + Drivers `__ + +- General + + - `mailing list `__ + (`searching the mailing list `__) + +.. + _[SECTION-REFERENCES-END] + +.. + _[SECTION-CODE-NAVIGATION-BEGIN] + +Source code navigation +====================== + +.. _cscope_intro: + +cscope +------ + +`Cscope `__ is a tool for +efficient navigation of C sources. To use it, a cscope database must +be generated from the existing sources. In a Linux tree, the command +:command:`make ARCH=x86 cscope` is sufficient. Specification of the +architecture through the ARCH variable is optional but recommended; +otherwise, some architecture dependent functions will appear multiple +times in the database. + +You can build the cscope database with the command :command:`make +ARCH=x86 COMPILED_SOURCE=1 cscope`. This way, the cscope database will +only contain symbols that have already been used in the compile +process before, thus resulting in better performance when searching +for symbols. + +Cscope can also be used as stand-alone, but it is more useful when +combined with an editor. To use cscope with :command:`vim`, it is necessary to +install both packages and add the following lines to the file +:file:`.vimrc` (the machine in the lab already has the settings): + +.. code-block:: vim + + if has("cscope") + " Look for a 'cscope.out' file starting from the current directory, + " going up to the root directory. + let s:dirs = split(getcwd(), "/") + while s:dirs != [] + let s:path = "/" . join(s:dirs, "/") + if (filereadable(s:path . "/cscope.out")) + execute "cs add " . s:path . "/cscope.out " . s:path . " -v" + break + endif + let s:dirs = s:dirs[:-2] + endwhile + + set csto=0 " Use cscope first, then ctags + set cst " Only search cscope + set csverb " Make cs verbose + + nmap ``s :cs find s ``=expand("``")```` + nmap ``g :cs find g ``=expand("``")```` + nmap ``c :cs find c ``=expand("``")```` + nmap ``t :cs find t ``=expand("``")```` + nmap ``e :cs find e ``=expand("``")```` + nmap ``f :cs find f ``=expand("``")```` + nmap ``i :cs find i ^``=expand("``")``$`` + nmap ``d :cs find d ``=expand("``")```` + nmap :cnext + nmap :cprev + + " Open a quickfix window for the following queries. + set cscopequickfix=s-,c-,d-,i-,t-,e-,g- + endif + +The script searches for a file called :file:`cscope.out` in the current directory, or +in parent directories. If :command:`vim` finds this file, you can use the shortcut :code:`Ctrl +]` +or :code:`Ctrl+\ g` (the combination control-\\ followed by g) to jump directly to +the definition of the word under the cursor (function, variable, structure, etc.). +Similarly, you can use :code:`Ctrl+\ s` to go where the word under the cursor is used. + +You can take a cscope-enabled :file:`.vimrc` file (also contains other goodies) from +https://github.com/ddvlad/cfg/blob/master/\_vimrc. +The following guidelines are based on this file, but also show basic :command:`vim` commands +that have the same effect. + +If there are more than one results (usually there are) you can move between them +using :code:`F6` and :code:`F5` (:code:`:ccnext` and :code:`:cprev`). +You can also open a new panel showing the results using :code:`:copen`. To close +the panel, use the :code:`:cclose` command. + +To return to the previous location, use :code:`Ctrl+o` (o, not zero). +The command can be used multiple times and works even if cscope changed the +file you are currently editing. + +To go to a symbol definition directly when :command:`vim` starts, use :code:`vim -t ` +(for example :code:`vim -t task_struct`). Otherwise, if you started :command:`vim` and want +to search for a symbol by name, use :code:`cs find g ` (for example +:code:`cs find g task_struct`). + +If you found more than one results and opened a panel showing all the matches +(using :code:`:copen`) and you want to find a symbol of type structure, +it is recommended to search in the results panel (using :code:`/` -- slash) +the character :code:`{` (opening brace). + +.. important:: + You can get a summary of all the :command:`cscope` commands using :command:`:cs help`. + + For more info, use the :command:`vim` built-in help command: :command:`:h cscope` or :command:`:h copen`. + +If you use :command:`emacs`, install the :code:`xcscope-el` package and +add the following lines in :file:`~/.emacs`. + +.. code-block:: vim + + (require ‘xcscope) + (cscope-setup) + +These commands will activate cscope for the C and C++ modes automatically. +:code:`C-s s` is the key bindings prefix and :code:`C-s s s` is used to +search for a symbol (if you call it when the cursor is over a word, +it will use that). For more details, check `https://github.com/dkogan/xcscope.el` + +clangd +------ + +`Clangd `__ is a language server that provides tools +for navigating C and C++ code. +`Language Server Protocol `__ +facilitates features like go-to-definition, find-references, hover, completion, etc., +using semantic whole project analysis. + +Clangd requires a compilation database to understand the kernel source code. +It can be generated with: + +.. code-block:: bash + + make defconfig + make + scripts/clang-tools/gen_compile_commands.py + +LSP clients: + +- Vim/Neovim (`coc.nvim `__, `nvim-lsp `__, `vim-lsc `__, `vim-lsp `__) +- Emacs (`lsp-mode `__) +- VSCode (`clangd extension `__) + +Kscope +------ + +For a simpler interface, `Kscope `__ +is a cscope frontend which uses QT. It is lightweight, very fast and very +easy to use. It allows searching using regular expressions, call graphs, etc. +Kscope is no longer mantained. + +There is also a `port `__ +of version 1.6 for Qt4 and KDE 4 which keeps the integration of the text +editor Kate and is easier to use than the last version on SourceForge. + +LXR Cross-Reference +------------------- + +LXR (LXR Cross-Reference) is a tool that allows indexing and +referencing the symbols in the source code of a program using +a web interface. The web interface shows links to +locations in files where a symbol is defined or used. Development website +for LXR is http://sourceforge.net/projects/lxr. Similar tools +are `OpenGrok `__ and +`Gonzui `__. + +Although LXR was originally intended for the Linux kernel sources, it is +also used in the sources of `Mozilla `__, +`Apache HTTP Server `__ and +`FreeBSD `__. + +There are a number of sites that use LXR for cross-referencing the +the sources of the Linux kernel, the main site being `the original site of +development `__ which does not work anymore. You can +use `https://elixir.bootlin.com/ `__. + +LXR allows searching for an identifier (symbol), after a free text +or after a file name. The main feature and, at the same +time, the main advantage provided is the ease of finding the declaration +of any global identifier. This way, it facilitates quick access to function +declarations, variables, macro definitions and the code can be easily +navigated. Also, the fact that it can detect what code areas are affected +when a variable or function is changed is a real advantage in the development +and debugging phase. + +SourceWeb +--------- + +`SourceWeb `__ is a source code indexer +for C and C++. It uses the +`framework `__ +provided by the Clang compiler to index the code. + +The main difference between cscope and SourceWeb is the fact that SourceWeb +is, in a way, a compiler pass. SourceWeb doesn't index all the code, but +only the code that was efectively compiled by the compiler. This way, some +problems are eliminated, such as ambiguities about which variant of a function +defined in multiple places is used. This also means that the indexing takes +more time, because the compiled files must pass one more time through +the indexer to generate the references. + +Usage example: + +.. code-block:: bash + + make oldconfig + sw-btrace make -j4 + sw-btrace-to-compile-db + sw-clang-indexer --index-project + sourceweb index + +:file:`sw-btrace` is a script that adds the :file:`libsw-btrace.so` +library to :code:`LD_PRELOAD`. This way, the library is loaded by +every process started by :code:`make` (basically, the compiler), +registers the commands used to start the processes and generates +a filed called :file:`btrace.log`. This file is then used by +:code:`sw-btrace-to-compile-db` which converts it to a format defined +by clang: `JSON Compilation Database `__. +This JSON Compilation Database resulted from the above steps is then +used by the indexer, which makes one more pass through the compiled +source files and generates the index used by the GUI. + +Word of advice: don't index the sources you are working with, but use +a copy, because SourceWeb doesn't have, at this moment, the capability +to regenerate the index for a single file and you will have to regenerate +the complete index. + +.. + _[SECTION-CODE-NAVIGATION-END] + +.. + _[SECTION-DEBUGGING-BEGIN] + +Kernel Debugging +================ + +Debugging a kernel is a much more difficult process than the debugging +of a program, because there is no support from the operating system. +This is why this process is usually done using two computers, connected +on serial interfaces. + +.. _gdb_intro: + +gdb (Linux) +----------- + +A simpler debug method on Linux, but with many disadvantages, +is local debugging, using `gdb `__, +the uncompressed kernel image (:file:`vmlinux`) and :file:`/proc/kcore` +(the real-time kernel image). This method is usually used to inspect +the kernel and detect certain inconsistencies while it runs. The +method is useful especially if the kernel was compiled using the +:code:`-g` option, which keeps debug information. Some well-known +debug techniques can't be used by this method, such as breakpoints +of data modification. + +.. note:: Because :file:`/proc` is a virtual filesystem, :file:`/proc/kcore` + does not physically exist on the disk. It is generated on-the-fly + by the kernel when a program tries to access :file:`proc/kcore`. + + It is used for debugging purposes. + + From :command:`man proc`, we have: + + :: + + /proc/kcore + This file represents the physical memory of the system and is stored in the ELF core file format. With this pseudo-file, and + an unstripped kernel (/usr/src/linux/vmlinux) binary, GDB can be used to examine the current state of any kernel data struc‐ + tures. + +The uncompressed kernel image offers information about the data structures +and symbols it contains. + +.. code-block:: bash + + student@eg106$ cd ~/src/linux + student@eg106$ file vmlinux + vmlinux: ELF 32-bit LSB executable, Intel 80386, ... + student@eg106$ nm vmlinux | grep sys_call_table + c02e535c R sys_call_table + student@eg106$ cat System.map | grep sys_call_table + c02e535c R sys_call_table + +The :command:`nm` utility is used to show the symbols in an object or +executable file. In our case, :file:`vmlinux` is an ELF file. Alternately, +we can use the file :file:`System.map` to view information about the +symbols in kernel. + +Then we use :command:`gdb` to inspect the symbols using the uncompressed +kernel image. A simple :command:`gdb` session is the following: + +.. code-block:: bash + + student@eg106$ cd ~/src/linux + stduent@eg106$ gdb --quiet vmlinux + Using host libthread_db library "/lib/tls/libthread_db.so.1". + (gdb) x/x 0xc02e535c + 0xc02e535c ``: 0xc011bc58 + (gdb) x/16 0xc02e535c + 0xc02e535c ``: 0xc011bc58 0xc011482a 0xc01013d3 0xc014363d + 0xc02e536c ``: 0xc014369f 0xc0142d4e 0xc0142de5 0xc011548b + 0xc02e537c ``: 0xc0142d7d 0xc01507a1 0xc015042c 0xc0101431 + 0xc02e538c ``: 0xc014249e 0xc0115c6c 0xc014fee7 0xc0142725 + (gdb) x/x sys_call_table + 0xc011bc58 ``: 0xffe000ba + (gdb) x/x &sys_call_table + 0xc02e535c ``: 0xc011bc58 + (gdb) x/16 &sys_call_table + 0xc02e535c ``: 0xc011bc58 0xc011482a 0xc01013d3 0xc014363d + 0xc02e536c ``: 0xc014369f 0xc0142d4e 0xc0142de5 0xc011548b + 0xc02e537c ``: 0xc0142d7d 0xc01507a1 0xc015042c 0xc0101431 + 0xc02e538c ``: 0xc014249e 0xc0115c6c 0xc014fee7 0xc0142725 + (gdb) x/x sys_fork + 0xc01013d3 ``: 0x3824548b + (gdb) disass sys_fork + Dump of assembler code for function sys_fork: + 0xc01013d3 ``: mov 0x38(%esp),%edx + 0xc01013d7 ``: mov $0x11,%eax + 0xc01013dc ``: push $0x0 + 0xc01013de ``: push $0x0 + 0xc01013e0 ``: push $0x0 + 0xc01013e2 ``: lea 0x10(%esp),%ecx + 0xc01013e6 ``: call 0xc0111aab `` + 0xc01013eb ``: add $0xc,%esp + 0xc01013ee ``: ret + End of assembler dump. + +It can be noticed that the uncompressed kernel image was used as an argument +for :command:`gdb`. The image can be found in the root of the kernel sources +after compilation. + +A few commands used for debugging using :command:`gdb` are: + +- :command:`x` (examine) - Used to show the contents of the memory area + whose address is specified as an argument to the command (this address + can be the value of a physical address, a symbol or the address of a + symbol). It can take as arguments (preceded by :code:`/`): the format + to display the data in (:code:`x` for hexadecimal, :code:`d` for + decimal, etc.), how many memory units to display and the size of a + memory unit. + +- :command:`disassemble` - Used to disassemble a function. + +- :command:`p` (print) - Used to evaluate and show the value of an + expression. The format to show the data in can be specified as + an argument (:code:`/x` for hexadecimal, :code:`/d` for decimal, etc.). + +The analysis of the kernel image is a method of static analysis. If we +want to perform dynamic analysis (analyzing how the kernel runs, not +only its static image) we can use :file:`/proc/kcore`; this is a dynamic +image (in memory) of the kernel. + +.. code-block:: bash + + student@eg106$ gdb ~/src/linux/vmlinux /proc/kcore + Core was generated by `root=/dev/hda3 ro'. + #0 0x00000000 in ?? () + (gdb) p sys_call_table + $1 = -1072579496 + (gdb) p /x sys_call_table + $2 = 0xc011bc58 + (gdb) p /x &sys_call_table + $3 = 0xc02e535c + (gdb) x/16 &sys_call_table + 0xc02e535c ``: 0xc011bc58 0xc011482a 0xc01013d3 0xc014363d + 0xc02e536c ``: 0xc014369f 0xc0142d4e 0xc0142de5 0xc011548b + 0xc02e537c ``: 0xc0142d7d 0xc01507a1 0xc015042c 0xc0101431 + 0xc02e538c ``: 0xc014249e 0xc0115c6c 0xc014fee7 0xc0142725 + +Using the dynamic image of the kernel is useful for detecting `rootkits `__. + +- `Linux Device Drivers 3rd Edition - Debuggers and Related Tools `__ +- `Detecting Rootkits and Kernel-level Compromises in Linux `__ +- `User-Mode Linux `__ + +Getting a stack trace +--------------------- + +Sometimes, you will want information about the trace the execution +reaches a certain point. You can determine this information using +:command:`cscope` or LXR, but some function are called from many +execution paths, which makes this method difficult. + +In these situations, it is useful to get a stack trace, which can be +simply done using the function :code:`dump_stack()`. + +.. + _[SECTION-DEBUGGING-END] + +.. + _[SECTION-DOCUMENTATION-BEGIN] + +Documentation +============= + +Kernel development is a difficult process, compared to user space +programming. The API is different and the complexity of the subsystems +in kernel requires additional preparation. The associated documentation +is heterogeneous, sometimes requiring the inspection of multiple sources +to have a more complete understanding of a certain aspect. + +The main advantages of the Linux kernel are the access to sources and +the open development system. Because of this, the Internet offers a +larger number of documentation for the kernel. + +A few links related to the Linux kernel are shown bellow: + +- `KernelNewbies `__ +- `KernelNewbies - Kernel Hacking `__ +- `Kernel Analysis - HOWTO `__ +- `Linux Kernel Programming `__ +- `Linux kernel - Wikibooks `__ + +The links are not comprehensive. Using `The Internet `__ and +`kernel source code `__ is essential. + +.. + _[SECTION-DOCUMENTATION-END] + +Exercises +========= + +.. + _[SECTION-EXERCISES-REMARKS-BEGIN] + +Remarks +------- + +.. note:: + + - Usually, the steps used to develop a kernel module are the + following: + + - editing the module source code (on the physical machine); + - module compilation (on the physical machine); + - generation of the minimal image for the virtual machine; + this image contains the kernel, your module, busybox and + eventually test programs; + - starting the virtual machine using QEMU; + - running the tests in the virtual machine. + + - When using cscope, use :file:`~/src/linux`. + If there is no :file:`cscope.out` file, you can generate it using + the command :command:`make ARCH=x86 cscope`. + + - You can find more details about the virtual machine at + :ref:`vm_link`. + +.. important:: + Before solving an exercice, **carefully** read all its bullets. + +.. + _[SECTION-EXERCISES-REMARKS-END] + +.. + _[EXERCISE1-BEGIN] + +Booting the virtual machine +--------------------------- + +A summary of the virtual machine infrastructure: + +- :file:`~/src/linux` - Linux kernel sources, needed to + compile modules. The directory contains the file :file:`cscope.out`, + used for navigation in the source tree. + +- :file:`~/src/linux/tools/labs/qemu`- scripts and auxiliary + files used to generate and run the QEMU VM. + +To start the VM, run :command:`make boot` in the directory :file:`~/src/linux/tools/labs`: + +.. code-block:: shell + + student@eg106:~$ cd ~/src/linux/tools/labs + student@eg106:~/src/linux/tools/labs$ make boot + +By default, you will not get a prompt or any graphical interface, but you can connect to +a console exposed by the virtual machine using :command:`minicom` or :command:`screen`. + +.. code-block:: shell + + student@eg106:~/src/linux/tools/labs$ minicom -D serial.pts + + + + qemux86 login: + Poky (Yocto Project Reference Distro) 2.3 qemux86 /dev/hvc0 + +Alternatively, you can start the virtual machine with graphical interface support, using +the :command:`QEMU_DISPLAY=gtk make boot`. + +.. note:: + To access the virtual machine, at the login prompt, enter the + username :code:`root`; there is no need to enter a password. + The virtual machine will start with the permissions of the + root account. + +.. + _[EXERCISE1-END] + +.. + _[EXERCISE2-BEGIN] + +Adding and using a virtual disk +------------------------------- + +.. note:: If you don't have the file :file:`mydisk.img`, you can download + it from the address http://elf.cs.pub.ro/so2/res/laboratoare/mydisk.img. + The file must be placed in :file:`tools/labs`. + +In the :file:`~/src/linux/tools/labs` directory, you have a new virtual +machine disk, in the file :file:`mydisk.img`. We want to add the disk +to the virtual machine and use it within the virtual machine. + +Edit :file:`qemu/Makefile` and add :code:`-drive file=mydisk.img,if=virtio,format=raw` +to the :code:`QEMU_OPTS` variable. + +.. note:: There are already two disks added to qemu (disk1.img and disk2.img). You will need + to add the new one after them. In this case, the new disk can be accessed as + :file:`/dev/vdd` (vda is the root partition, vdb is disk1 and vdc is disk2). + +.. hint:: You do not need to manually create the entry for the new disk in :file:`/dev` + because the virtual machine uses :command:`devtmpfs`. + +Run :code:`make` in :file:`tools/labs` to boot the virtual machine. +Create :file:`/test` directory and try to mount the new disk: + +.. code-block:: bash + + mkdir /test + mount /dev/vdd /test + +The reason why we can not mount the virtual disk is because we do not have support in the +kernel for the filesystem with which the :file:`mydisk.img` is formatted. You will need +to identify the filesystem for :file:`mydisk.img` and compile kernel support for that filesystem. + +Close the virtual machine (close the QEMU window, you do not need to use another command). +Use the :command:`file` command on the physical machine to find out with which filesystem +the :file:`mydisk.img` file is formatted. You will identify the :command:`btrfs` file system. + +You will need to enable :command:`btrfs` support in the kernel and recompile the kernel image. + +.. warning:: If you receive an error while executing the :command:`make menuconfig` + command, you probably do not have the :command:`libncurses5-dev` + package installed. Install it using the command: + + :: + + sudo apt-get install libncurses5-dev + +.. hint:: Enter the :file:`~/src/linux/` subdirectory. Run :command:`make menuconfig` + and go to the *File systems* section. Enable *Btrfs filesystem support*. + You will need to use the builtin option (not the module), i.e. :command:`<*>` must appear + next to the option (**not** :command:``). + + Save the configuration you have made. Use the default configuration file (:file:`config`). + + In the kernel source subdirectory (:file:`~/src/linux/`) recompile using the command: + + :: + + make + + To wait less, you can use the :command:`-j` option run multiple jobs in parallel. + Generally, it is recommended to use :command:`number of CPUs+1`: + + :: + + make -j5 + +After the kernel recompilation finishes, **restart** the QEMU virtual machine: +that is, launch the :command:`make` command in the subdirectory. You +do not need to copy anything, because the :file:`bzImage` file is a symlink to the kernel +image you just recompiled. + +Inside the QEMU virtual machine, repeat the :command:`mkdir` and :command:`mount` operations. +With support for the :command:`btrfs` filesystem, now :command:`mount` will finish successfully. + +.. note:: When doing your homework, there is no need to recompile the kernel + because you will only use kernel modules. However, it is important + to be familiar with configuring and recompiling a kernel. + + If you still plan to recompile the kernel, make a backup of the bzImage + file (follow the link in ~/src/linux for the full path). This will allow + you to return to the initial setup in order to have an environment + identical to the one used by vmchecker. + +.. + _[EXERCISE2-END] + +.. + _[EXERCISE3-BEGIN] + +GDB and QEMU +------------ + +We can investigate and troubleshoot the QEMU virtual machine in real time. + +.. note:: You can also use the :command:`GDB Dashboard` plugin for a user-friendly interface. + :command:`gdb` must be compiled with Python support. + + In order to install it, you can just run: + :: + + wget -P ~ git.io/.gdbinit + +To do this, we start the QEMU virtual machine first. Then, we can connect +with :command:`gdb` to **a running QEMU virtual machine** using the command + +:: + + make gdb + +We used the QEMU command with the :command:`-s` parameter, which means +listening to port :code:`1234` from :command:`gdb`. We can do debugging +using a **remote target** for :command:`gdb`. The existing :file:`Makefile` +takes care of the details. + +When you attach a debugger to a process, the process is suspended. +You can add breakpoints and inspect the current status of the process. + +Attach to the QEMU virtual machine (using the :command:`make gdb` command) +and place a breakpoint in the :code:`sys_access` function using the +following command in the :command:`gdb` console: + +:: + + break sys_access + +At this time, the virtual machine is suspended. To continue executing it (up to the possible call +of the :code:`sys_access` function), use the command: + +:: + + continue + +in the :command:`gdb` console. + +At this time, the virtual machine is active and has a usable console. +To make a :code:`sys_access` call, issue a :command:`ls` command. +Note that the virtual machine was again suspended by :command:`gdb` +and the corresponding :code:`sys_access` callback message appeared within the :command:`gdb` console. + +Trace code execution using :command:`step` instruction, :command:`continue` or :command:`next` +instruction. You probably do not understand everything that happens, so use commands +such as :command:`list` and :command:`backtrace` to trace the execution. + +.. hint:: At the :command:`gdb` prompt, you can press :command:`Enter` + (without anything else) to rerun the last command. + +.. + _[EXERCISE3-END] + +.. + _[EXERCISE4-BEGIN] + +4. GDB spelunking +----------------- + +Use :command:`gdb` to display the source code of the function that creates kernel threads +(:code:`kernel_thread`). + +.. note:: You can use GDB for static kernel analysis using, in the kernel source directory, + a command such as: + + :: + + gdb vmlinux + + Go over the `gdb (Linux) <#gdb-linux>`__ section of the lab. + +Use :command:`gdb` to find the address of the :code:`jiffies` variable in memory and its contents. +The :code:`jiffies` variable holds the number of ticks (clock beats) since the system started. + +.. hint:: To track the value of the jiffies variable, use dynamic analysis in :command:`gdb` + by running the command: + + :: + + make gdb + + as in the previous exercise. + + Go over the `gdb (Linux) <#gdb-linux>`__ section of the lab. + +.. hint:: The :code:`jiffies` is a 64-bit variable. + You can see that its address is the same as the :code:`jiffies_64` variable. + + To explore the contents of a 64-bit variable, use in the :command:`gdb` console the command: + + :: + + x/gx & jiffies + + If you wanted to display the contents of the 32-bit variable, + you would use in the :command:`gdb` console the command: + + :: + + x/wx & jiffies + +.. + _[EXERCISE4-END] + +.. + _[EXERCISE5-BEGIN] + + +5. Cscope spelunking +-------------------- + +Use LXR or cscope in the :file:`~/src/linux/` directory to discover +the location of certain structures or functions. + +Cscope index files are already generated. Use :command:`vim` and other related commands +to scroll through the source code. For example, use the command: + +:: + + vim + +for opening the :command:`vim` editor. Afterwards, inside the editor, use commands such as: + +:command:`:cs find g task\_struct`. + +Find the file in which the following data types are defined: + +- ``struct task_struct`` + +- ``struct semaphore`` + +- ``struct list_head`` + +- ``spinlock_t`` + +- ``struct file_system_type`` + +.. hint:: For a certain structure, only its name needs to be searched. + + For instance, in the case of :command:`struct task_struct`, + search for the :command:`task_struct` string. + +Usually, you will get more matches. To locate the one you are interested in, do the following: + +#. List all matches by using, in :command:`vim`, :command:`:copen` command. + +#. Look for the right match (where the structure is defined) by looking for an open character + (:command:`{`), a single character on the structure definition line. To search for the open + braid you use in :command:`vim` the construction :command:`/{`. + +#. On the respective line, press :command:`Enter` to get into the source code where the variable + is defined. + +#. Close the secondary window using the command: :command:`:cclose` command. + +Find the file in which the following global kernel variables are declared: + +- ``sys_call_table`` + +- ``file_systems`` + +- ``current`` + +- ``chrdevs`` + +.. hint:: To do this, use a :command:`vim` command with the syntax: + + :command:`:cs f g ` + + where :command:`` is the name of the symbol being searched. + +Find the file in which the following functions are declared: + +- ``copy_from_user`` + +- ``vmalloc`` + +- ``schedule_timeout`` + +- ``add_timer`` + +.. hint:: To do this, use a :command:`vim` command with the syntax: + + :command:`:cs f g ` + + where :command:`` is the name of the symbol being searched. + +Scroll through the following sequence of structures: + +- ``struct task_struct`` + +- ``struct mm_struct`` + +- ``struct vm_area_struct`` + +- ``struct vm_operations_struct`` + +That is, you access a structure and then you find fields with the data type of the +next structure, access the respective fields and so on. +Note in which files these structures are defined; this will be useful to the following labs. + + +.. hint:: In order to search for a symbol in :command:`vim` (with :command:`cscope` support) + when the cursor is placed on it, use the :command:`Ctrl+]` keyboard shortcut. + + To return to the previous match (the one before search/jump), use the + :command:`Ctrl+o` keyboard shortcut. + + To move forward with the search (to return to matches before :command:`Ctrl+o`), + use the :command:`Ctrl+i` keyboard shortcut. + +Following the above instructions, find and go through the function call sequence: + +- ``bio_alloc`` + +- ``bio_alloc_bioset`` + +- ``bvec_alloc`` + +- ``kmem_cache_alloc`` + +- ``slab_alloc`` + +.. note:: Read `cscope <#cscope>`__ or `LXR Cross-Reference <#lxr-cross-reference>`__ sections of the lab. diff --git a/Documentation/teaching/labs/kernel_api.rst b/Documentation/teaching/labs/kernel_api.rst new file mode 100644 index 00000000000000..f927e5cf3d9ca3 --- /dev/null +++ b/Documentation/teaching/labs/kernel_api.rst @@ -0,0 +1,857 @@ +========== +Kernel API +========== + +Lab objectives +============== + + * Familiarize yourself with the basic Linux kernel API + * Description of memory allocation mechanisms + * Description of locking mechanisms + +Overview +======== + +Inside the current lab we present a set of concepts and basic functions required +for starting Linux kernel programming. It is important to note that kernel +programming differs greatly from user space programming. The kernel is a +stand-alone entity that can not use libraries in user-space (not even libc). +As a result, the usual user-space functions (printf, malloc, free, open, read, +write, memcpy, strcpy, etc.) can no longer be used. In conclusion, kernel +programming is based on a totally new and independent API that is unrelated to +the user-space API, whether we refer to POSIX or ANSI C (standard C language +library functions). + +Accessing memory +================ + +An important difference in kernel programming is how to access and allocate +memory. Due to the fact that kernel programming is very close to the physical +machine, there are important rules for memory management. First, it works with +several types of memory: + + * Physical memory + * Virtual memory from the kernel address space + * Virtual memory from a process's address space + * Resident memory - we know for sure that the accessed pages are present in + physical memory + +Virtual memory in a process's address space can not be considered resident due +to the virtual memory mechanisms implemented by the operating system: pages may +be swapped or simply may not be present in physical memory as a result of the +demand paging mechanism. The memory in the kernel address space can be resident +or not. Both the data and code segments of a module and the kernel stack of a +process are resident. Dynamic memory may or may not be resident, depending on +how it is allocated. + +When working with resident memory, things are simple: memory can be accessed at +any time. But if working with non-resident memory, then it can only be accessed +from certain contexts. Non-resident memory can only be accessed from the +process context. Accessing non-resident memory from the context of an +interrupt has unpredictable results and, therefore, when the operating +system detects such access, it will take drastic measures: blocking or +resetting the system to prevent serious corruption. + +The virtual memory of a process can not be accessed directly from the kernel. +In general, it is totally discouraged to access the address space of a process, +but there are situations where a device driver needs to do it. The typical case +is where the device driver needs to access a buffer from the user-space. In +this case, the device driver must use special features and not directly access +the buffer. This is necessary to prevent access to invalid memory areas. + +Another difference from the user-space scheduling, relative to memory, is due to +the stack, a stack whose size is fixed and limited. A stack of 4K is used in +Linux, and a stack of 12K is used in Windows. For this reason, the +allocation of large structures on stack or the use of recursive calls should +be avoided. + +Contexts of execution +===================== + +In relation to kernel execution, we distinguish two contexts: process context +and interrupt context. We are in the process context when we run code as a +result of a system call or when we run in the context of a kernel thread. When +we run in a routine to handle an interrupt or a deferrable action, we run in +an interrupt context. + +Some of the kernel API calls can block the current process. Common examples are +using a semaphore or waiting for a condition. In this case, the process is +put into the ``WAITING`` state and another process is running. An interesting +situation occurs when a function that can lead to the current process to be +suspended, is called from an interrupt context. In this case, there is no +current process, and therefore the results are unpredictable. Whenever the +operating system detects this condition will generate an error condition that +will cause the operating system to shut down. + +Locking +======= + +One of the most important features of kernel programming is parallelism. Linux +supports SMP systems with multiple processors and kernel preemptivity. This +makes kernel programming more difficult because access to global variables must +be synchronized with either spinlock primitives or blocking primitives. Although +it is recommended to use blocking primitives, they can not be used in an +interrupt context, so the only locking solution in the context of an interrupt +is spinlocks. + +Spinlocks are used in order to achieve mutual exclusion. When it can not get +access to the critical region, it does not suspend the current process, but it +uses the busy-waiting mechanism (waiting in a :c:func:`while` loop for the lock +to be released). +The code that runs in the critical region protected by a spinlock is not allowed +to suspend the current process (it must adhere to the execution conditions in +the interrupt context). Moreover, the CPU will not be released except for +the case of an interrupt. Due to the mechanism used, it is important that a +spinlock is being held as little time as possible. + +Preemptivity +============ + +Linux uses preemptive kernels. The notion of preemptive multitasking should not +be confused with the notion of a preemptive kernel. The notion of preemptive +multitasking refers to the fact that the operating system forcefully interrupts +a process running in user space when its quantum (time slice) expires, in order +to run another process. +A kernel is preemptive if a process running in kernel mode (as a result of a +system call) can be interrupted so that another process is being run. + +Because of preemptivity, when we share resources between two portions of code +that can run from different process contexts, we need to protect ourselves with +synchronization primitives, even in the case of a single processor. + +Linux Kernel API +================ + +Convention indicating errors +---------------------------- + +For Linux kernel programming, the convention used for calling functions to +indicate success is the same as in UNIX programming: 0 for success, or a value +other than 0 for failure. +For failures, negative values are returned as shown in the example below: + +.. code-block:: c + + if (alloc_memory() != 0) + return -ENOMEM; + + if (user_parameter_valid() != 0) + return -EINVAL; + +The exhaustive list of errors and a summary explanation can be found in +:file:`include/uapi/asm-generic/errno-base.h` and in +:file:`include/uapi/asm-generic/ernno.h`. + +Strings of characters +--------------------- + +In Linux, the kernel programmer is provided with the usual routine functions: +:c:func:`strcpy`, :c:func:`strncpy`, :c:func:`strlcpy`, :c:func:`strcat`, +:c:func:`strncat`, :c:func:`strlcat`, :c:func:`strcmp`, :c:func:`strncmp`, +:c:func:`strnicmp`, :c:func:`strchr`, :c:func:`strnchr`, :c:func:`strrchr`, +:c:func:`strstr`, :c:func:`strlen`, :c:func:`memset`, :c:func:`memmove`, +:c:func:`memcmp`, etc. These functions are declared in the +:file:`include/linux/string.h` header and are implemented in the kernel in the +:file:`lib/string.c` file. + +printk +------ + +The printf equivalent in the kernel is printk, defined in +:file:`include/linux/printk.h`. The :c:func:`printk` syntax is very similar +to :c:func:`printf`. The first +parameter of :c:func:`printk` decides the log category in which the current log +falls into: + +.. code-block:: c + + #define KERN_EMERG "<0>" /* system is unusable */ + #define KERN_ALERT "<1>" /* action must be taken immediately */ + #define KERN_CRIT "<2>" /* critical conditions */ + #define KERN_ERR "<3>" /* error conditions */ + #define KERN_WARNING "<4>" /* warning conditions */ + #define KERN_NOTICE "<5>" /* normal but significant condition */ + #define KERN_INFO "<6>" /* informational */ + #define KERN_DEBUG "<7>" /* debug-level messages */ + +Thus, a warning message in the kernel would be sent with: + +.. code-block:: c + + printk(KERN_WARNING "my_module input string %s\n", buff); + + +If the logging level is missing from the :c:func:`printk` call, logging is done +with the default level at the time of the call. One thing to keep in mind is +that messages sent with :c:func:`printk` are only visible on the console if and +only if their level exceeds the default level set on the console. + +To reduce the size of lines when using :c:func:`printk`, it is recommended to +use the following help functions instead of directly using the :c:func:`printk` +call: + +.. code-block:: c + + pr_emerg(fmt, ...); /* similar to printk(KERN_EMERG pr_fmt(fmt), ...); */ + pr_alert(fmt, ...); /* similar to printk(KERN_ALERT pr_fmt(fmt), ...); */ + pr_crit(fmt, ...); /* similar to printk(KERN_CRIT pr_fmt(fmt), ...); */ + pr_err(fmt, ...); /* similar to printk(KERN_ERR pr_fmt(fmt), ...); */ + pr_warn(fmt, ...); /* similar to printk(KERN_WARNING pr_fmt(fmt), ...); */ + pr_notice(fmt, ...); /* similar to printk(KERN_NOTICE pr_fmt(fmt), ...); */ + pr_info(fmt, ...); /* similar to printk(KERN_INFO pr_fmt(fmt), ...); */ + pr_debug(fmt, ...); /* similar to printk(KERN_DEBUG pr_fmt(fmt), ...); */ + +A special case is :c:func:`pr_debug` that calls the :c:func:`printk` function +only when the :c:macro:`DEBUG` macro is defined or if dynamic debugging is used. + + +Memory allocation +----------------- + +In Linux only resident memory can be allocated, using :c:func:`kmalloc` call. +A typical :c:func:`kmalloc` call is presented below: + +.. code-block:: c + + #include + + string = kmalloc (string_len + 1, GFP_KERNEL); + if (!string) { + //report error: -ENOMEM; + } + +As you can see, the first parameter indicates the size in bytes of the allocated +area. The function returns a pointer to a memory area that can be directly used +in the kernel, or :c:macro:`NULL` if memory could not be allocated. The second +parameter specifies how allocation should be done and the most commonly used +values for this are: + + * :c:data:`GFP_KERNEL` - using this value may cause the current process to + be suspended. Thus, it can not be used in the interrupt context. + * :c:data:`GFP_ATOMIC` - using this value it ensures that the + :c:func:`kmalloc` function does not suspend the current process. It can be + used anytime. + +The counterpart to the :c:func:`kmalloc` function is :c:func:`kfree`, a function +that receives as argument an area allocated by :c:func:`kmalloc`. This function +does not suspend the current process and can therefore be called from any +context. + +lists +----- + +Because linked lists are often used, the Linux kernel API provides a unified +way of defining and using lists. This involves using a +:c:type:`struct list_head` element in the structure we want to consider as a +list node. The :c:type:`struct list_head` is defined in +:file:`include/linux/list.h` along with all the other functions that manipulate +the lists. The following code shows the definition of +the :c:type:`struct list_head` and the use of an element of this type in another +well-known structure in the Linux kernel: + +.. code-block:: c + + struct list_head { + struct list_head *next, *prev; + }; + + struct task_struct { + ... + struct list_head children; + ... + }; + +The usual routines for working with lists are the following: + + * :c:macro:`LIST_HEAD(name)` is used to declare the sentinel of a list + * :c:func:`INIT_LIST_HEAD(struct list_head *list)` is used to initialize the + sentinel of a list when dynamic allocation is made, by setting the value of + the :c:data:`next` and :c:data:`prev` to list fields. + * :c:func:`list_add(struct list_head *new, struct list_head *head)` adds the + :c:data:`new` element after the :c:data:`head` element. + * :c:func:`list_del(struct list_head *entry)` deletes the item at the + :c:data:`entry` address of the list it belongs to. + * :c:macro:`list_entry(ptr, type, member)` returns the structure with the + type :c:type:`type` that contains the element :c:data:`ptr` from the list, + having the name :c:member:`member` within the structure. + * :c:macro:`list_for_each(pos, head)` iterates over a list using + :c:data:`pos` as a cursor. + * :c:macro:`list_for_each_safe(pos, n, head)` iterates over a list using + :c:data:`pos` as a cursor and :c:data:`n` as a temporary cursor. + This macro is used to delete an item from the list. + +The following code shows how to use these routines: + +.. code-block:: c + + #include + #include + + struct pid_list { + pid_t pid; + struct list_head list; + }; + + LIST_HEAD(my_list); + + static int add_pid(pid_t pid) + { + struct pid_list *ple = kmalloc(sizeof *ple, GFP_KERNEL); + + if (!ple) + return -ENOMEM; + + ple->pid = pid; + list_add(&ple->list, &my_list); + + return 0; + } + + static int del_pid(pid_t pid) + { + struct list_head *i, *tmp; + struct pid_list *ple; + + list_for_each_safe(i, tmp, &my_list) { + ple = list_entry(i, struct pid_list, list); + if (ple->pid == pid) { + list_del(i); + kfree(ple); + return 0; + } + } + + return -EINVAL; + } + + static void destroy_list(void) + { + struct list_head *i, *n; + struct pid_list *ple; + + list_for_each_safe(i, n, &my_list) { + ple = list_entry(i, struct pid_list, list); + list_del(i); + kfree(ple); + } + } + +The evolution of the list can be seen in the following figure: + +.. image:: ../res/list_evolution.png + :width: 85% + +You see the stack type behavior introduced by the :c:macro:`list_add` macro, +and the use of a sentinel. + +From the above example, it can be noticed that the way to define and use a list +(double-linked) is generic and, at the same time, it does not introduce an +additional overhead. The :c:type:`struct list_head` is used to maintain the +links between the list elements. It can be noticed that iterating over the list +is also done with this structure, and that retrieving a list element can be done +using :c:macro:`list_entry`. This idea of implementing and using a list is not +new, as it has already been described in The Art of Computer Programming by +Donald Knuth in the 1980s. + +Several kernel list functions and macro definitions are presented and explained +in the :file:`include/linux/list.h` header. + +Spinlock +-------- + +:c:type:`spinlock_t` (defined in :file:`linux/spinlock.h`) is the basic type +that implements the spinlock concept in Linux. It describes a spinlock, and the +operations associated with a spinlock are :c:func:`spin_lock_init`, +:c:func:`spin_lock`, :c:func:`spin_unlock`. An example of use is given below: + +.. code-block:: c + + #include + + DEFINE_SPINLOCK(lock1); + spinlock_t lock2; + + spin_lock_init(&lock2); + + spin_lock(&lock1); + /* critical region */ + spin_unlock(&lock1); + + spin_lock(&lock2); + /* critical region */ + spin_unlock(&lock2); + + +In Linux, you can use reader-writer spinlocks, useful for readers-writers +problems. +These types of locks are identified by :c:type:`rwlock_t`, and the functions +that can work on a reader-writer spinlock are +:c:func:`rwlock_init`, +:c:func:`read_lock`, +:c:func:`write_lock`. +An example of use: + + +.. code-block:: c + + #include + + DEFINE_RWLOCK(lock); + + struct pid_list { + pid_t pid; + struct list_head list; + }; + + int have_pid(struct list_head *lh, int pid) + { + struct list_head *i; + void *elem; + + read_lock(&lock); + list_for_each(i, lh) { + struct pid_list *pl = list_entry(i, struct pid_list, list); + if (pl->pid == pid) { + read_unlock(&lock); + return 1; + } + } + read_unlock(&lock); + + return 0; + } + + void add_pid(struct list_head *lh, struct pid_list *pl) + { + write_lock(&lock); + list_add(&pl->list, lh); + write_unlock(&lock); + } + +mutex +----- + +A mutex is a variable of the :c:type:`struct mutex` type (defined in +:file:`linux/mutex.h`). +Functions and macros for working with mutexes are listed below: + +.. code-block:: c + + #include + + /* functions for mutex initialization */ + void mutex_init(struct mutex *mutex); + DEFINE_MUTEX(name); + + /* functions for mutex acquire */ + void mutex_lock(struct mutex *mutex); + + /* functions for mutex release */ + void mutex_unlock(struct mutex *mutex); + +Operations are similar to classic mutex operations in user-space or spinlock +operations: the mutex is acquired before entering the critical region and it is +released after exiting the critical region. Unlike spinlocks, these operations +can only be used in process context. + +.. _atomic-variables: + +Atomic variables +---------------- + +Often, you only need to synchronize access to a simple variable, such as a +counter. For this, an :c:type:`atomic_t` type can be used (defined in +:file:`include/linux/atomic.h`), that holds an integer value. Below are some +operations that can be performed on an :c:type:`atomic_t` variable. + +.. code-block:: c + + #include + + void atomic_set(atomic_t *v, int i); + int atomic_read(atomic_t *v); + void atomic_add(int i, atomic_t *v); + void atomic_sub(int i, atomic_t *v); + void atomic_inc(atomic_t *v); + void atomic_dec(atomic_t *v); + int atomic_inc_and_test(atomic_t *v); + int atomic_dec_and_test(atomic_t *v); + int atomic_cmpxchg(atomic_t *v, int old, int new); + +Use of atomic variables +*********************** + +A common way of using atomic variables is to store the status of an action +(e.g. a flag). So we can use an atomic variable to mark exclusive actions. For +example, we consider that an atomic variable can have the LOCKED and UNLOCKED +values, and if the respective variable equals LOCKED then a specific function +should return -EBUSY. +Such an usage is shown schematically in the code below: + +.. code-block:: c + + #define LOCKED 0 + #define UNLOCKED 1 + + static atomic_t flag; + + static int my_acquire(void) + { + int initial_flag; + + /* + * Check if flag is UNLOCKED; if so, lock it and do it atomically. + * + * This is the atomic equivalent of + * if (flag == UNLOCKED) + * flag = LOCKED; + * else + * return -EBUSY; + */ + initial_flag = atomic_cmpxchg(&flag, UNLOCKED, LOCKED); + if (initial_flag == LOCKED) { + printk(KERN_ALERT "Already locked.\n"); + return -EBUSY; + } + + /* Do your thing after getting the lock. */ + [...] + } + + static void my_release(void) + { + /* Release flag; mark it as unlocked. */ + atomic_set(&flag, UNLOCKED); + } + + void my_init(void) + { + [...] + /* Atomic variable is initially unlocked. */ + atomic_set(&flag, UNLOCKED); + + [...] + } + + +The above code is the equivalent of using a trylock (such as +:c:func:`pthread_mutex_trylock`). + +We can also use a variable to store the size of a buffer and for atomic +updates of the respective variable. The code below is such an example: + +.. code-block:: c + + static unsigned char buffer[MAX_SIZE]; + static atomic_t size; + + static void add_to_buffer(unsigned char value) + { + buffer[atomic_read(&size)] = value; + atomic_inc(&size); + } + + static unsigned char remove_from_buffer(void) + { + unsigned char value; + + value = buffer[atomic_read(&size)]; + atomic_dec(&size); + + return value + } + + static void reset_buffer(void) + { + atomic_set(&size, 0); + } + + void my_init(void) + { + [...] + /* Initialized buffer and size. */ + atomic_set(&size, 0); + memset(buffer, 0, sizeof(buffer)); + + [...] + } + +Atomic bitwise operations +------------------------- + +The kernel provides a set of functions (in :file:`asm/bitops.h`) that modify or +test bits in an atomic way. + +.. code-block:: c + + #include + + void set_bit(int nr, void *addr); + void clear_bit(int nr, void *addr); + void change_bit(int nr, void *addr); + int test_and_set_bit(int nr, void *addr); + int test_and_clear_bit(int nr, void *addr); + int test_and_change_bit(int nr, void *addr); + +:c:data:`Addr` represents the address of the memory area whose bits are being +modified or tested and :c:data:`nr` is the bit on which the operation is +performed. + +Exercises +========= + +.. include:: ../labs/exercises-summary.hrst +.. |LAB_NAME| replace:: kernel_api + +0. Intro +-------- + +Using |LXR|_ find the definitions of the following symbols in the Linux kernel: + + * :c:type:`struct list_head` + * :c:func:`INIT_LIST_HEAD` + * :c:func:`list_add` + * :c:macro:`list_for_each` + * :c:macro:`list_entry` + * :c:macro:`container_of` + * :c:macro:`offsetof` + +1. Memory allocation in Linux kernel +------------------------------------ + +Generate the skeleton for the task named **1-mem** and browse the +contents of the :file:`mem.c` file. Observe the use of :c:func:`kmalloc` +call for memory allocation. + + 1. Compile the source code and load the :file:`mem.ko` module using + :command:`insmod`. + 2. View the kernel messages using the :command:`dmesg` command. + 3. Unload the kernel module using the :command:`rmmod mem` command. + +.. note:: Review the `Memory Allocation`_ section in the lab. + +2. Sleeping in atomic context +----------------------------- + +Generate the skeleton for the task named **2-sched-spin** and browse +the contents of the :file:`sched-spin.c` file. + + 1. Compile the source code and load the module, according the above info: + (:command:`make build` and :command:`make copy`) + 2. Notice that it is waiting for 5 seconds until the insertion + order is complete. + 3. Unload the kernel module. + 4. Look for the lines marked with: ``TODO 0`` to create an atomic + section. Re-compile the source code and reload the module into + the kernel. + +You should now get an error. Look at the stack trace. What is the +cause of the error? + +.. hint:: In the error message, follow the line containing the :c:macro:`BUG` + for a description of the error. You are not allowed to sleep in + atomic context. The atomic context is given by a section + between a lock operation and an unlock on a spinlock. + +.. note:: The + :c:func:`schedule_timeout` function, corroborated with the + :c:macro:`set_current_state` macro, forces the current process to wait + for 5 seconds. + +.. note:: Review the `Contexts of execution`_, `Locking`_ and `Spinlock`_ + sections. + +3. Working with kernel memory +----------------------------- + +Generate the skeleton for the task named **3-memory** directory and +browse the contents of the :file:`memory.c` file. Notice the comments +marked with ``TODO``. You must allocate 4 structures of type :c:type:`struct +task_info` and initialize them (in :c:func:`memory_init`), then print and +free them (in :c:func:`memory_exit`). + + 1. (TODO 1) Allocate memory for :c:type:`struct task_info` structure and + initialize its fields: + + * The :c:member:`pid` field to the PID transmitted as a parameter; + * The :c:member:`timestamp` field to the value of the :c:data:`jiffies` + variable, which holds the number of ticks that have occurred since the + system booted. + + 2. (TODO 2) Allocate :c:type:`struct task_info` for the current process, + the parent process, the next process, the next process of the next + process, with the following information: + + * PID of the current process, which can be retrieved from + :c:type:`struct task_struct` structure, returned by :c:macro:`current` + macro. + + .. hint:: + Search for :c:type:`pid` in :c:type:`task_struct`. + + * PID of the parent process of the current process. + + .. hint:: + Search for the relevant field from :c:type:`struct task_struct` + structure. Look after "parent". + + * PID of the next process from the list of processes, relative to the + current process. + + .. hint:: + Use :c:macro:`next_task` macro, which returns a pointer to the next + process (i.e a :c:type:`struct task_struct` structure). + + * PID of the next process of the next process, relative to the current + process. + + .. hint:: + Call the :c:macro:`next_task` macro 2 times. + + 3. (TODO 3) Display the four structures. + + * Use :c:func:`printk` to display their two fields: + :c:member:`pid` and :c:member:`timestamp`. + + 4. (TODO 4) Release the memory occupied by the structures + (use :c:func:`kfree`). + +.. hint:: + * You can access the current process using :c:macro:`current` + macro. + * Look for the relevant fields in the :c:type:`struct task_struct` + structure (:c:member:`pid`, :c:member:`parent`). + * Use the :c:macro:`next_task` macro. The macro returns the pointer to + the next process (ie. a :c:type:`struct task_struct*` structure). + +.. note:: The :c:type:`struct task_struct` structure contains two fields to + designate the parent of a task: + + * :c:member:`real_parent` points to the process that created the + task or to process with pid 1 (init) if the parent + completed its execution. + * :c:member:`parent` indicates to the current task parent (the + process that will be reported if the task completes + execution). + + In general, the values of the two fields are the same, but + there are situations where they differ, for example when + using the :c:func:`ptrace` system call. + +.. hint:: Review the `Memory allocation`_ section in the lab. + + +4. Working with kernel lists +---------------------------- + +Generate the skeleton for the task named **4-list**. Browse the +contents of the :file:`list.c` file and notice the comments marked with +``TODO``. The current process will add the four structures from the +previous exercise into a list. The list will be built in the +:c:func:`task_info_add_for_current` function which is called when module is +loaded. The list will be printed and deleted in the :c:func:`list_exit` +function and the :c:func:`task_info_purge_list` function. + + 1. (TODO 1) Complete the :c:func:`task_info_add_to_list` function to allocate + a :c:type:`struct task_info` structure and add it to the list. + + 2. (TODO 2) Complete the :c:func:`task_info_purge_list` function to delete + all the elements in the list. + + 3. Compile the kernel module. Load and unload the module by + following the messages displayed by the kernel. + +.. hint:: Review the labs `Lists`_ section. When deleting items from + the list, you will need to use either the + :c:macro:`list_for_each_safe` or :c:macro:`list_for_each_entry_safe` + macros. + +5. Working with kernel lists for process handling +------------------------------------------------- + +Generate the skeleton for the task named **5-list-full**. Browse the +contents of the :file:`list-full.c` and notice comments marked with +``TODO``. In addition to the :file:`4-list` functionality we add the +following: + + * A :c:member:`count` field showing how many times a process has been "added" + to the list. + * If a process is "added" several times, no new entry is created in + the list, but: + + * Update the :c:member:`timestamp` field. + * Increment :c:member:`count`. + + * To implement the counter facility, add a :c:func:`task_info_find_pid` + function that searches for a pid in the existing list. + + * If found, return the reference to the :c:type:`task_info` struct. If + not, return :c:macro:`NULL`. + + * An expiration facility. If a process was added more than 3 + seconds ago and if it does not have a :c:member:`count` greater than 5 then + it is considered expired and is removed from the list. + * The expiration facility is already implemented in the + :c:func:`task_info_remove_expired` function. + + 1. (TODO 1) Implement the :c:func:`task_info_find_pid` function. + 2. (TODO 2) Change a field of an item in the list so it does not + expire. It must not satisfy a part of the expiration condition + from :c:func:`task_info_remove_expired`. + + .. hint:: For ``TODO 2``, extract the first element from the list (the one + referred by :c:member:`head.next`) and set the :c:member:`count` + field to a large enough value. Use :c:func:`atomic_set` function. + + 3. Compile, copy, load and unload the kernel module following the displayed + messages. + Kernel module loading will take some time, because :c:func:`sleep` is + being called by :c:func:`schedule_timeout` function. + +6. Synchronizing list work +-------------------------- + +Generate the skeleton for the task named **6-list-sync**. + + 1. Browse the code and look for ``TODO 1`` string. + 2. Use a spinlock or a read-write lock to synchronize access to the + list. + 3. Compile, load and unload the kernel module. + +.. important:: Always lock data, not code! + +.. note:: Read `Spinlock`_ section of the lab. + +7. Test module calling in our list module +----------------------------------------- + +Generate the skeleton for the task named **7-list-test** and browse +the contents of the :file:`list-test.c` file. We'll use it as a test +module. It will call functions exported by the **6-list-sync** +task. The exported functions are the ones marked with **extern** in +:file:`list-test.c` file. + +Uncomment the commented code from :file:`7-list-test.c`. Look for ``TODO 1``. + +To export the above functions from the module located at :file:`6-list-sync/` +directory, the following steps are required: + + 1. Functions must not be static. + 2. Use the :c:macro:`EXPORT_SYMBOL` macro to export the kernel symbols. For + example: :c:macro:`EXPORT_SYMBOL(task_info_remove_expired);`. The + macro must be used for each function after the function is defined. + Browse the code and look for the ``TODO 2`` string in the + :file:`list-sync.c`. + 3. Remove from the module from **6-list-sync** the code that avoids the + expiration of a list item (it is in contradiction to our exercise). + 4. Compile and load the module from :file:`6-list-sync/`. Once loaded, it + exposes exported functions and can be used by the test + module. You can check this by searching for the function names + in :file:`/proc/kallsyms` before and after loading the module. + 5. Compile the test module and then load it. + 6. Use :command:`lsmod` to check that the two modules have been loaded. + What do you notice? + 7. Unload the kernel test module. + +What should be the unload order of the two modules (the module from +**6-list-sync** and the test module)? What happens if you use another order? diff --git a/Documentation/teaching/labs/kernel_modules.rst b/Documentation/teaching/labs/kernel_modules.rst new file mode 100644 index 00000000000000..6b1ca31fd02109 --- /dev/null +++ b/Documentation/teaching/labs/kernel_modules.rst @@ -0,0 +1,1345 @@ +============== +Kernel modules +============== + +Lab objectives +============== + +* creating simple modules +* describing the process of kernel module compilation +* presenting how a module can be used with a kernel +* simple kernel debugging methods + +.. + _[SECTION-OVERVIEW-BEGIN] + +Kernel Modules Overview +======================= + +A monolithic kernel, though faster than a microkernel, has the disadvantage of +lack of modularity and extensibility. On modern monolithic kernels, this has +been solved by using kernel modules. A kernel module (or loadable kernel mode) +is an object file that contains code that can extend the kernel functionality +at runtime (it is loaded as needed); When a kernel module is no longer needed, +it can be unloaded. Most of the device drivers are used in the form of kernel +modules. + +For the development of Linux device drivers, it is recommended to download the +kernel sources, configure and compile them and then install the compiled version +on the test /development tool machine. + +.. + _[SECTION-OVERVIEW-END] + +.. + _[SECTION-MODULE-EXAMPLE-BEGIN] + +An example of a kernel module +============================= + +Below is a very simple example of a kernel module. When loading into the kernel, +it will generate the message :code:`"Hi"`. When unloading the kernel module, the +:code:`"Bye"` message will be generated. + +.. code-block:: c + + #include + #include + #include + + MODULE_DESCRIPTION("My kernel module"); + MODULE_AUTHOR("Me"); + MODULE_LICENSE("GPL"); + + static int dummy_init(void) + { + pr_debug("Hi\n"); + return 0; + } + + static void dummy_exit(void) + { + pr_debug("Bye\n"); + } + + module_init(dummy_init); + module_exit(dummy_exit); + + +The generated messages will not be displayed on the console but will be saved +in a specially reserved memory area for this, from where they will be extracted +by the logging daemon (syslog). To display kernel messages, you can use the +:command:`dmesg` command or inspect the logs: + +.. code-block:: bash + + # cat /var/log/syslog | tail -2 + Feb 20 13:57:38 asgard kernel: Hi + Feb 20 13:57:43 asgard kernel: Bye + + # dmesg | tail -2 + Hi + Bye + +.. + _[SECTION-MODULE-EXAMPLE-END] + +.. + _[SECTION-COMPILE-MODULES-BEGIN] + +Compiling kernel modules +======================== + +Compiling a kernel module differs from compiling an user program. First, other +headers should be used. Also, the module should not be linked to libraries. +And, last but not least, the module must be compiled with the same options as +the kernel in which we load the module. For these reasons, there is a standard +compilation method (:code:`kbuild`). This method requires the use of two files: +a :file:`Makefile` and a :file:`Kbuild` file. + +Below is an example of a :file:`Makefile`: + +.. code-block:: bash + + KDIR = /lib/modules/`uname -r`/build + + kbuild: + make -C $(KDIR) M=`pwd` + + clean: + make -C $(KDIR) M=`pwd` clean + +And the example of a :file:`Kbuild` file used to compile a module: + +.. code-block:: bash + + EXTRA_CFLAGS = -Wall -g + + obj-m = modul.o + + +As you can see, calling :command:`make` on the :file:`Makefile` file in the +example shown will result in the :command:`make` invocation in the kernel +source directory (``/lib/modules/`uname -r`/build``) and referring to the +current directory (``M = `pwd```). This process ultimately leads to reading +the :file:`Kbuild` file from the current directory and compiling the module +as instructed in this file. + +.. note:: For labs we will configure different :command:`KDIR`, according to + the virtual machine specifications: + + .. code-block:: bash + + KDIR = /home/student/src/linux + [...] + +A :file:`Kbuild` file contains one or more directives for compiling a kernel +module. The easiest example of such a directive is ``obj-m = +module.o``. Following this directive, a kernel module (:code:`ko` - kernel +object) will be created, starting from the ``module.o`` file. ``module.o`` will +be created starting from ``module.c`` or ``module.S``. All of these files can +be found in the :file:`Kbuild`'s directory. + +An example of a :file:`Kbuild` file that uses several sub-modules is shown +below: + +.. code-block:: bash + + EXTRA_CFLAGS = -Wall -g + + obj-m = supermodule.o + supermodule-y = module-a.o module-b.o + +For the example above, the steps to compile are: + + * compile the :file:`module-a.c` and :file:`module-b.c` sources, + resulting in module-a.o and module-b.o objects + * :file:`module-a.o` and :file:`module-b.o` will then be linked + in :file:`supermodule.o` + * from :file:`supermodule.o` will be created :file:`supermodule.ko` + module + + +The suffix of targets in :file:`Kbuild` determines how they are used, as +follows: + + * M (modules) is a target for loadable kernel modules + + * Y (yes) represents a target for object files to be compiled and then + linked to a module (``$(mode_name)-y``) or within the kernel (``obj-y``) + + * any other target suffix will be ignored by :file:`Kbuild` and will not be + compiled + + +.. note:: These suffixes are used to easily configure the kernel by running the + :command:`make menuconfig` command or directly editing the + :file:`.config` file. This file sets a series of variables that are + used to determine which features are added to the kernel at build + time. For example, when adding BTRFS support with :command:`make + menuconfig`, add the line :code:`CONFIG_BTRFS_FS = y` to the + :file:`.config` file. The BTRFS kbuild contains the line + ``obj-$(CONFIG_BTRFS_FS):= btrfs.o``, which becomes ``obj-y:= + btrfs.o``. This will compile the :file:`btrfs.o` object and will be + linked to the kernel. Before the variable was set, the line became + ``obj:=btrfs.o`` and so it was ignored, and the kernel was build + without BTRFS support. + +For more details, see the :file:`Documentation/kbuild/makefiles.txt` and +:file:`Documentation/kbuild/modules.txt` files within the kernel sources. + +.. + _[SECTION-COMPILE-MODULES-END] + +.. + _[SECTION-LOAD-MODULES-BEGIN] + +Loading/unloading a kernel module +================================= + +To load a kernel module, use the :command:`insmod` utility. This utility +receives as a parameter the path to the :file:`*.ko` file in which the module +was compiled and linked. Unloading the module from the kernel is done using +the :command:`rmmod` command, which receives the module name as a parameter. + +.. code-block:: bash + + $ insmod module.ko + $ rmmod module.ko + +When loading the kernel module, the routine specified as a parameter of the +``module_init`` macro will be executed. Similarly, when the module is unloaded +the routine specified as a parameter of the ``module_exit`` will be executed. + +A complete example of compiling and loading/unloading a kernel module is +presented below: + +.. code-block:: bash + + faust:~/lab-01/modul-lin# ls + Kbuild Makefile modul.c + + faust:~/lab-01/modul-lin# make + make -C /lib/modules/`uname -r`/build M=`pwd` + make[1]: Entering directory `/usr/src/linux-2.6.28.4' + LD /root/lab-01/modul-lin/built-in.o + CC [M] /root/lab-01/modul-lin/modul.o + Building modules, stage 2. + MODPOST 1 modules + CC /root/lab-01/modul-lin/modul.mod.o + LD [M] /root/lab-01/modul-lin/modul.ko + make[1]: Leaving directory `/usr/src/linux-2.6.28.4' + + faust:~/lab-01/modul-lin# ls + built-in.o Kbuild Makefile modul.c Module.markers + modules.order Module.symvers modul.ko modul.mod.c + modul.mod.o modul.o + + faust:~/lab-01/modul-lin# insmod modul.ko + + faust:~/lab-01/modul-lin# dmesg | tail -1 + Hi + + faust:~/lab-01/modul-lin# rmmod modul + + faust:~/lab-01/modul-lin# dmesg | tail -2 + Hi + Bye + +Information about modules loaded into the kernel can be found using the +:command:`lsmod` command or by inspecting the :file:`/proc/modules`, +:file:`/sys/module` directories. + +.. + _[SECTION-LOAD-MODULES-END] + +.. + _[SECTION-DEBUG-MODULES-BEGIN] + +Kernel Module Debugging +======================= + +Troubleshooting a kernel module is much more complicated than debugging a +regular program. First, a mistake in a kernel module can lead to blocking the +entire system. Troubleshooting is therefore much slowed down. To avoid reboot, +it is recommended to use a virtual machine (qemu, virtualbox, vmware). + +When a module containing bugs is inserted into the kernel, it will eventually +generate a `kernel oops `_. +A kernel oops is an invalid operation detected by the kernel and can only +be generated by the kernel. For a stable kernel version, it almost certainly +means that the module contains a bug. After the oops appears, the kernel will +continue to work. + +Very important to the appearance of a kernel oops is saving the generated +message. As noted above, messages generated by the kernel are saved in logs and +can be displayed with the :command:`dmesg` command. To make sure that no kernel +message is lost, it is recommended to insert/test the kernel directly from the +console, or periodically check the kernel messages. Noteworthy is that an oops +can occur because of a programming error, but also a because of hardware error. + +If a fatal error occurs, after which the system can not return to a stable +state, a `kernel panic `_ is +generated. + +Look at the kernel module below that contains a bug that generates an oops: + +.. code-block:: c + + /* + * Oops generating kernel module + */ + + #include + #include + #include + + MODULE_DESCRIPTION ("Oops"); + MODULE_LICENSE ("GPL"); + MODULE_AUTHOR ("PSO"); + + #define OP_READ 0 + #define OP_WRITE 1 + #define OP_OOPS OP_WRITE + + static int my_oops_init (void) + { + int *a; + + a = (int *) 0x00001234; + #if OP_OOPS == OP_WRITE + *a = 3; + #elif OP_OOPS == OP_READ + printk (KERN_ALERT "value = %d\n", *a); + #else + #error "Unknown op for oops!" + #endif + + return 0; + } + + static void my_oops_exit (void) + { + } + + module_init (my_oops_init); + module_exit (my_oops_exit); + +.. ** + +Inserting this module into the kernel will generate an oops: + +.. code-block:: bash + + faust:~/lab-01/modul-oops# insmod oops.ko + [...] + + faust:~/lab-01/modul-oops# dmesg | tail -32 + BUG: unable to handle kernel paging request at 00001234 + IP: [] my_oops_init+0x5/0x20 [oops] + *de = 00000000 + Oops: 0002 [#1] PREEMPT DEBUG_PAGEALLOC + last sysfs file: /sys/devices/virtual/net/lo/operstate + Modules linked in: oops(+) netconsole ide_cd_mod pcnet32 crc32 cdrom [last unloaded: modul] + + Pid: 4157, comm: insmod Not tainted (2.6.28.4 #2) VMware Virtual Platform + EIP: 0060:[] EFLAGS: 00010246 CPU: 0 + EIP is at my_oops_init+0x5/0x20 [oops] + EAX: 00000000 EBX: fffffffc ECX: c89d4300 EDX: 00000001 + ESI: c89d4000 EDI: 00000000 EBP: c5799e24 ESP: c5799e24 + DS: 007b ES: 007b FS: 0000 GS: 0033 SS: 0068 + Process insmod (pid: 4157, ti=c5799000 task=c665c780 task.ti=c5799000) + Stack: + c5799f8c c010102d c72b51d8 0000000c c5799e58 c01708e4 00000124 00000000 + c89d4300 c5799e58 c724f448 00000001 c89d4300 c5799e60 c0170981 c5799f8c + c014b698 00000000 00000000 c5799f78 c5799f20 00000500 c665cb00 c89d4300 + Call Trace: + [] ? _stext+0x2d/0x170 + [] ? __vunmap+0xa4/0xf0 + [] ? vfree+0x21/0x30 + [] ? load_module+0x19b8/0x1a40 + [] ? __mutex_unlock_slowpath+0xd5/0x140 + [] ? trace_hardirqs_on_caller+0x106/0x150 + [] ? sys_init_module+0x8a/0x1b0 + [] ? trace_hardirqs_on_caller+0x106/0x150 + [] ? trace_hardirqs_on_thunk+0xc/0x10 + [] ? sysenter_do_call+0x12/0x43 + Code: 05 34 12 00 00 03 00 00 00 5d c3 eb 0d 90 90 90 90 90 90 90 90 + EIP: [] my_oops_init+0x5/0x20 [oops] SS:ESP 0068:c5799e24 + ---[ end trace 2981ce73ae801363 ]--- + +Although relatively cryptic, the message provided by the kernel to the +appearance of an oops provides valuable information about the error. First line: + +.. code-block:: bash + + BUG: unable to handle kernel paging request at 00001234 + EIP: [] my_oops_init + 0x5 / 0x20 [oops] + +Tells us the cause and the address of the instruction that generated the error. +In our case this is an invalid access to memory. + +Next line + + ``Oops: 0002 [# 1] PREEMPT DEBUG_PAGEALLOC`` + +Tells us that it's the first oops (#1). This is important in the context that +an oops can lead to other oopses. Usually only the first oops is relevant. +Furthermore, the oops code (``0002``) provides information about the error type +(see :file:`arch/x86/include/asm/trap_pf.h`): + + + * Bit 0 == 0 means no page found, 1 means protection fault + * Bit 1 == 0 means read, 1 means write + * Bit 2 == 0 means kernel, 1 means user mode + +In this case, we have a write access that generated the oops (bit 1 is 1). + +Below is a dump of the registers. It decodes the instruction pointer (``EIP``) +value and notes that the bug appeared in the :code:`my_oops_init` function with +a 5-byte offset (``EIP: [] my_oops_init+0x5``). The message also +shows the stack content and a backtrace of calls until then. + +If an invalid read call is generated (``#define OP_OOPS OP_READ``), the message +will be the same, but the oops code will differ, which would now be ``0000``: + +.. code-block:: bash + + faust:~/lab-01/modul-oops# dmesg | tail -33 + BUG: unable to handle kernel paging request at 00001234 + IP: [] my_oops_init+0x6/0x20 [oops] + *de = 00000000 + Oops: 0000 [#1] PREEMPT DEBUG_PAGEALLOC + last sysfs file: /sys/devices/virtual/net/lo/operstate + Modules linked in: oops(+) netconsole pcnet32 crc32 ide_cd_mod cdrom + + Pid: 2754, comm: insmod Not tainted (2.6.28.4 #2) VMware Virtual Platform + EIP: 0060:[] EFLAGS: 00010292 CPU: 0 + EIP is at my_oops_init+0x6/0x20 [oops] + EAX: 00000000 EBX: fffffffc ECX: c89c3380 EDX: 00000001 + ESI: c89c3010 EDI: 00000000 EBP: c57cbe24 ESP: c57cbe1c + DS: 007b ES: 007b FS: 0000 GS: 0033 SS: 0068 + Process insmod (pid: 2754, ti=c57cb000 task=c66ec780 task.ti=c57cb000) + Stack: + c57cbe34 00000282 c57cbf8c c010102d c57b9280 0000000c c57cbe58 c01708e4 + 00000124 00000000 c89c3380 c57cbe58 c5db1d38 00000001 c89c3380 c57cbe60 + c0170981 c57cbf8c c014b698 00000000 00000000 c57cbf78 c57cbf20 00000580 + Call Trace: + [] ? _stext+0x2d/0x170 + [] ? __vunmap+0xa4/0xf0 + [] ? vfree+0x21/0x30 + [] ? load_module+0x19b8/0x1a40 + [] ? printk+0x0/0x1a + [] ? __mutex_unlock_slowpath+0xd5/0x140 + [] ? trace_hardirqs_on_caller+0x106/0x150 + [] ? sys_init_module+0x8a/0x1b0 + [] ? trace_hardirqs_on_caller+0x106/0x150 + [] ? trace_hardirqs_on_thunk+0xc/0x10 + [] ? sysenter_do_call+0x12/0x43 + Code: 34 12 00 00 c7 04 24 54 30 9c c8 89 44 24 04 e8 58 a0 99 f7 31 + EIP: [] my_oops_init+0x6/0x20 [oops] SS:ESP 0068:c57cbe1c + ---[ end trace 45eeb3d6ea8ff1ed ]--- + +objdump +------- + +Detailed information about the instruction that generated the oops can be found +using the :command:`objdump` utility. Useful options to use are :command:`-d` +to disassemble the code and :command:`-S` for interleaving C code in assembly +language code. For efficient decoding, however, we need the address where the +kernel module was loaded. This can be found in :file:`/proc/modules`. + +Here's an example of using :command:`objdump` on the above module to identify +the instruction that generated the oops: + +.. code-block:: bash + + faust:~/lab-01/modul-oops# cat /proc/modules + oops 1280 1 - Loading 0xc89d4000 + netconsole 8352 0 - Live 0xc89ad000 + pcnet32 33412 0 - Live 0xc895a000 + ide_cd_mod 34952 0 - Live 0xc8903000 + crc32 4224 1 pcnet32, Live 0xc888a000 + cdrom 34848 1 ide_cd_mod, Live 0xc886d000 + + faust:~/lab-01/modul-oops# objdump -dS --adjust-vma=0xc89d4000 oops.ko + + oops.ko: file format elf32-i386 + + + Disassembly of section .text: + + c89d4000 : + #define OP_READ 0 + #define OP_WRITE 1 + #define OP_OOPS OP_WRITE + + static int my_oops_init (void) + { + c89d4000: 55 push %ebp + #else + #error "Unknown op for oops!" + #endif + + return 0; + } + c89d4001: 31 c0 xor %eax,%eax + #define OP_READ 0 + #define OP_WRITE 1 + #define OP_OOPS OP_WRITE + + static int my_oops_init (void) + { + c89d4003: 89 e5 mov %esp,%ebp + int *a; + + a = (int *) 0x00001234; + #if OP_OOPS == OP_WRITE + *a = 3; + c89d4005: c7 05 34 12 00 00 03 movl $0x3,0x1234 + c89d400c: 00 00 00 + #else + #error "Unknown op for oops!" + #endif + + return 0; + } + c89d400f: 5d pop %ebp + c89d4010: c3 ret + c89d4011: eb 0d jmp c89c3020 + c89d4013: 90 nop + c89d4014: 90 nop + c89d4015: 90 nop + c89d4016: 90 nop + c89d4017: 90 nop + c89d4018: 90 nop + c89d4019: 90 nop + c89d401a: 90 nop + c89d401b: 90 nop + c89d401c: 90 nop + c89d401d: 90 nop + c89d401e: 90 nop + c89d401f: 90 nop + + c89d4020 : + + static void my_oops_exit (void) + { + c89d4020: 55 push %ebp + c89d4021: 89 e5 mov %esp,%ebp + } + c89d4023: 5d pop %ebp + c89d4024: c3 ret + c89d4025: 90 nop + c89d4026: 90 nop + c89d4027: 90 nop + +Note that the instruction that generated the oops (``c89d4005`` identified +earlier) is: + + ``C89d4005: c7 05 34 12 00 00 03 movl $ 0x3,0x1234`` + +That is exactly what was expected - storing value 3 at 0x0001234. + +The :file:`/proc/modules` is used to find the address where a kernel module is +loaded. The :command:`--adjust-vma` option allows you to display instructions +relative to ``0xc89d4000``. The :command:`-l` option displays the number of +each line in the source code interleaved with the assembly language code. + +addr2line +--------- + +A more simplistic way to find the code that generated an oops is to use the +:command:`addr2line` utility: + +.. code-block:: bash + + faust:~/lab-01/modul-oops# addr2line -e oops.o 0x5 + /root/lab-01/modul-oops/oops.c:23 + +Where ``0x5`` is the value of the program counter (``EIP = c89d4005``) that +generated the oops, minus the base address of the module (``0xc89d4000``) +according to :file:`/proc/modules` + +minicom +------- + +:command:`Minicom` (or other equivalent utilities, eg :command:`picocom`, +:command:`screen`) is a utility that can be used to connect and interact with a +serial port. The serial port is the basic method for analyzing kernel messages +or interacting with an embedded system in the development phase. There are two +more common ways to connect: + +* a serial port where the device we are going to use is :file:`/dev/ttyS0` + +* a serial USB port (FTDI) in which case the device we are going to use is + :file:`/dev/ttyUSB`. + +For the virtual machine used in the lab, the device that we need to use is +displayed after the virtual machine starts: + +.. code-block:: bash + + char device redirected to /dev/pts/20 (label virtiocon0) + +Minicom use: + +.. code-block:: bash + + #for connecting via COM1 and using a speed of 115,200 characters per second + minicom -b 115200 -D /dev/ttyS0 + + #For USB serial port connection + minicom -D /dev/ttyUSB0 + + #To connect to the serial port of the virtual machine + minicom -D /dev/pts/20 + +netconsole +---------- + +:command:`Netconsole` is a utility that allows logging of kernel debugging +messages over the network. This is useful when the disk logging system does not +work or when serial ports are not available or when the terminal does not +respond to commands. :command:`Netconsole` comes in the form of a kernel +module. + +To work, it needs the following parameters: + + * port, IP address, and the source interface name of the debug station + * port, MAC address, and IP address of the machine to which the debug + messages will be sent + +These parameters can be configured when the module is inserted into the kernel, +or even while the module is inserted if it has been compiled with the +``CONFIG_NETCONSOLE_DYNAMIC`` option. + +An example configuration when inserting :command:`netconsole` kernel module is +as follows: + +.. code-block:: bash + + alice:~# modprobe netconsole netconsole=6666@192.168.191.130/eth0,6000@192.168.191.1/00:50:56:c0:00:08 + +Thus, the debug messages on the station that has the address +``192.168.191.130`` will be sent to the ``eth0`` interface, having source port +``6666``. The messages will be sent to ``192.168.191.1`` with the MAC address +``00:50:56:c0:00:08``, on port ``6000``. + +Messages can be played on the destination station using :command:`netcat`: + +.. code-block:: bash + + bob:~ # nc -l -p 6000 -u + +Alternatively, the destination station can configure :command:`syslogd` to +intercept these messages. More information can be found in +:file:`Documentation/networking/netconsole.txt`. + +Printk debugging +---------------- + +``The two oldest and most useful debugging aids are Your Brain and Printf``. + +For debugging, a primitive way is often used, but it is quite effective: +:code:`printk` debugging. Although a debugger can also be used, it is generally +not very useful: simple bugs (uninitialized variables, memory management +problems, etc.) can be easily localized by control messages and the +kernel-decoded oop message. + +For more complex bugs, even a debugger can not help us too much unless the +operating system structure is very well understood. When debugging a kernel +module, there are a lot of unknowns in the equation: multiple contexts (we have +multiple processes and threads running at a time), interruptions, virtual +memory, etc. + +You can use :code:`printk` to display kernel messages to user space. It is +similar to :code:`printf`'s functionality; the only difference is that the +transmitted message can be prefixed with a string of :code:`""`, where +:code:`n` indicates the error level (loglevel) and has values between ``0`` and +``7``. Instead of :code:`""`, the levels can also be coded by symbolic +constants: + +.. code-block:: c + + KERN_EMERG - n = 0 + KERN_ALERT - n = 1 + KERN_CRIT - n = 2 + KERN_ERR - n = 3 + KERN_WARNING - n = 4 + KERN_NOTICE - n = 5 + KERN_INFO - n = 6 + KERN_DEBUG - n = 7 + + +The definitions of all log levels are found in :file:`linux/kern_levels.h`. +Basically, these log levels are used by the system to route messages sent to +various outputs: console, log files in :file:`/var/log` etc. + +.. note:: To display :code:`printk` messages in user space, the :code:`printk` + log level must be of higher priority than `console_loglevel` + variable. The default console log level can be configured from + :file:`/proc/sys/kernel/printk`. + + For instance, the command: + + .. code-block:: bash + + echo 8 > /proc/sys/kernel/printk + + will enable all the kernel log messages to be displayed in the + console. That is, the logging level has to be strictly less than the + :code:`console_loglevel` variable. For example, if the + :code:`console_loglevel` has a value of ``5`` (specific to + :code:`KERN_NOTICE`), only messages with loglevel stricter than ``5`` + (i.e :code:`KERN_EMERG`, :code:`KERN_ALERT`, :code:`KERN_CRIT`, + :code:`KERN_ERR`, :code:`KERN_WARNING`) will be shown. + +Console-redirected messages can be useful for quickly viewing the effect of +executing the kernel code, but they are no longer so useful if the kernel +encounters an irreparable error and the system freezes. In this case, the logs +of the system must be consulted, as they keep the information between system +restarts. These are found in :file:`/var/log` and are text files, populated by +:code:`syslogd` and :code:`klogd` during the kernel run. :code:`syslogd` and +:code:`klogd` take the information from the virtual file system mounted in +:file:`/proc`. In principle, with :code:`syslogd` and :code:`klogd` turned on, +all messages coming from the kernel will go to :file:`/var/log/kern.log`. + +A simpler version for debugging is using the :file:`/var/log/debug` file. It +is populated only with the :code:`printk` messages from the kernel with the +:code:`KERN_DEBUG` log level. + +Given that a production kernel (similar to the one we're probably running with) +contains only release code, our module is among the few that send messages +prefixed with KERN_DEBUG . In this way, we can easily navigate through the +:file:`/var/log/debug` information by finding the messages corresponding to a +debugging session for our module. + +Such an example would be the following: + +.. code-block:: bash + + # Clear the debug file of previous information (or possibly a backup) + $ echo "New debug session" > /var/log/debug + # Run the tests + # If there is no critical error causing a panic kernel, check the output + # if a critical error occurs and the machine only responds to a restart, + restart the system and check /var/log/debug. + +The format of the messages must obviously contain all the information of +interest in order to detect the error, but inserting in the code :code:`printk` +to provide detailed information can be as time-consuming as writing the code to +solve the problem. This is usually a trade-off between the completeness of the +debugging messages displayed using :code:`printk` and the time it takes to +insert these messages into the text. + +A very simple way, less time-consuming for inserting :code:`printk` and +providing the possibility to analyze the flow of instructions for tests is the +use of the predefined constants :code:`__FILE__`, :code:`__LINE__` and +:code:`__func__`: + + * ``__FILE__`` is replaced by the compiler with the name of the source file + it is currently being compiled. + + * ``__LINE__`` is replaced by the compiler with the line number on which the + current instruction is found in the current source file. + + * ``__func__`` /``__FUNCTION__`` is replaced by the compiler with the name + of the function in which the current instruction is found. + +.. note:: + :code:`__FILE__` and :code:`__LINE__` are part of the ANSI C specifications: + :code:`__func__` is part of specification C99; :code:`__FUNCTION__` is a GNU + :code:`C` extension and is not portable; However, since we write code for the + :code:`Linux` kernel, we can use it without any problems. + +The following macro definition can be used in this case: + +.. code-block:: c + + #define PRINT_DEBUG \ + printk (KERN_DEBUG "[% s]: FUNC:% s: LINE:% d \ n", __FILE__, + __FUNCTION__, __LINE__) + +Then, at each point where we want to see if it is "reached" in execution, +insert PRINT_DEBUG; This is a simple and quick way, and can yield by carefully +analyzing the output. + +The :command:`dmesg` command is used to view the messages printed with +:code:`printk` but not appearing on the console. + +To delete all previous messages from a log file, run: + +.. code-block:: bash + + cat /dev/null > /var/log/debug + +To delete messages displayed by the :command:`dmesg` command, run: + +.. code-block:: bash + + dmesg -c + + +Dynamic debugging +----------------- + +Dynamic `dyndbg `_ +debugging enables dynamic debugging activation/deactivation. +Unlike :code:`printk`, it offers more advanced :code:`printk` options for the +messages we want to display; it is very useful for complex modules or +troubleshooting subsystems. +This significantly reduces the amount of messages displayed, leaving only +those relevant for the debug context. To enable ``dyndbg``, the kernel must be +compiled with the ``CONFIG_DYNAMIC_DEBUG`` option. Once configured, +:code:`pr_debug()`, :code:`dev_dbg()` and :code:`print_hex_dump_debug()`, +:code:`print_hex_dump_bytes()` can be dynamically enabled per call. + +The :file:`/sys/kernel/debug/dynamic_debug/control` file from the debugfs (where +:file:`/sys/kernel/debug` is the path to which debugfs was mounted) is used to +filter messages or to view existing filters. + +.. code-block:: c + + mount -t debugfs none /debug + +`Debugfs `_ +is a simple file system, used as a kernel-space interface and +user-space interface to configure different debug options. Any debug utility +can create and use its own files /folders in debugfs. + +For example, to display existing filters in ``dyndbg``, you will use: + +.. code-block:: bash + + cat /debug/dynamic_debug/control + +And to enable the debug message from line ``1603`` in the :file:`svcsock.c` file: + +.. code-block:: bash + + echo 'file svcsock.c line 1603 +p' > /debug/dynamic_debug/control + +The :file:`/debug/dynamic_debug/control` file is not a regular file. It shows +the ``dyndbg`` settings on the filters. Writing in it with an echo will change +these settings (it will not actually make a write). Be aware that the file +contains settings for ``dyndbg`` debugging messages. Do not log in this file. + +Dyndbg Options +~~~~~~~~~~~~~~ + +* ``func`` - just the debug messages from the functions that have the same + name as the one defined in the filter. + + .. code-block:: bash + + echo 'func svc_tcp_accept +p' > /debug/dynamic_debug/control + +* ``file`` - the name of the file(s) for which we want to display the debug + messages. It can be just the source name, but also the absolute path or + kernel-tree path. + + .. code-block:: bash + + file svcsock.c + file kernel/freezer.c + file /usr/src/packages/BUILD/sgi-enhancednfs-1.4/default/net/sunrpc/svcsock.c + +* ``module`` - module name. + + .. code-block:: bash + + module sunrpc + +* ``format`` - only messages whose display format contains the specified string. + + .. code-block:: bash + + format "nfsd: SETATTR" + +* ``line`` - the line or lines for which we want to enable debug calls. + + .. code-block:: bash + + # Triggers debug messages between lines 1603 and 1605 in the svcsock.c file + $ echo 'file svcsock.c line 1603-1605 +p' > /sys/kernel/debug/dynamic_debug/control + # Enables debug messages from the beginning of the file to line 1605 + $ echo 'file svcsock.c line -1605 +p' > /sys/kernel/debug/dynamic_debug/control + +In addition to the above options, a series of flags can be added, removed, or set +with operators ``+``, ``-`` or ``=``: + + * ``p`` activates the pr_debug() . + * ``f`` includes the name of the function in the printed message. + * ``l`` includes the line number in the printed message. + * ``m`` includes the module name in the printed message. + * ``t`` includes the thread id if it is not called from interrupt context + * ``_`` no flag is set. + +KDB: Kernel debugger +-------------------- + +The kernel debugger has proven to be very useful to facilitate the development and +debugging process. One of its main advantages is the possibility to perform live debugging. +This allows us to monitor, in real time, the accesses to memory or even modify the memory +while debugging. +The debugger has been integrated in the mainline kernel starting with version 2.6.26-rci. +KDB is not a *source debugger*, but for a complete analysis it can be used in parallel with +gdb and symbol files -- see :ref:`the GDB debugging section ` + +To use KDB, you have the following options: + + * non-usb keyboard + VGA text console + * serial port console + * USB EHCI debug port + +For the lab, we will use a serial interface connected to the host. +The following command will activate GDB over the serial port: + +.. code-block:: bash + + echo hvc0 > /sys/module/kgdboc/parameters/kgdboc + +KDB is a *stop mode debugger*, which means that, while it is active, all the other processes +are stopped. The kernel can be *forced* to enter KDB during execution using the following +`SysRq `__ command + +.. code-block:: bash + + echo g > /proc/sysrq-trigger + +or by using the key combination ``Ctrl+O g`` in a terminal connected to the serial port +(for example using :command:`minicom`). + +KDB has various commands to control and define the context of the debugged system: + + * lsmod, ps, kill, dmesg, env, bt (backtrace) + * dump trace logs + * hardware breakpoints + * modifying memory + +For a better description of the available commands you can use the ``help`` command in +the KDB shell. +In the next example, you can notice a simple KDB usage example which sets a hardware +breakpoint to monitor the changes of the ``mVar`` variable. + +.. code-block:: bash + + # trigger KDB + echo g > /proc/sysrq-trigger + # or if we are connected to the serial port issue + Ctrl-O g + # breakpoint on write access to the mVar variable + kdb> bph mVar dataw + # return from KDB + kdb> go + +.. + _[SECTION-DEBUG-MODULES-END] + +Exercises +========= + +.. _exercises_summary: + +.. include:: ../labs/exercises-summary.hrst +.. |LAB_NAME| replace:: kernel_modules + +0. Intro +-------- + +Using :command:`cscope` or |LXR|_ find the definitions of the following symbols +in the Linux kernel source code: + +* :c:func:`module_init` and :c:func:`module_exit` + + - what do the two macros do? What is ``init_module`` and ``cleanup_module``? + +* :c:data:`ignore_loglevel` + + - What is this variable used for? + +.. warning:: + If you have problems using :command:`cscope`, it is possible that the database + is not generated. To generate it, use the following command in the kernel + directory: + + .. code-block:: bash + + make ARCH=x86 cscope + +.. note:: + When searching for a structure using :command:`cscope`, use only the + structure name (without :code:`struct`). So, to search for the + structure :c:type:`struct module`, you will use the command + + .. code-block:: bash + + vim -t module + + or, in :command:`vim`, the command + + .. code-block:: bash + + :cs f g module + +.. note:: + For more info on using :command:`cscope`, read the + :ref:`cscope section ` in the previous lab. + +.. + _[EXERCISE1-BEGIN] + +1. Kernel module +---------------- + +To work with the kernel modules, we will follow the steps described +:ref:`above `. + +Generate the skeleton for the task named **1-2-test-mod** then build the module, + by running the following command in :file:`tools/labs`. + +.. code-block:: bash + + $ LABS=kernel_modules make skels + $ make build + +These command will build all the modules in the current +lab skeleton. + +.. warning:: + Until after solving exercise 3, you will get a compilation error for + ``3-error-mod``. To avoid this issue, remove the directory + :file:`skels/kernel_modules/3-error-mod/` and remove the corresponding + line from ``skels/Kbuild``. + +Start the VM using :command:`make console`, and perform the following tasks: + +* load the kernel module. + +* list the kernel modules and check if current module is present + +* unload the kernel module + +* view the messages displayed at loading/unloading the kernel module using + :command:`dmesg` command + +.. note:: Read `Loading/unloading a kernel module`_ section. When unloading + a kernel module, you can specify only the module name + (without extension). + +.. + _[EXERCISE1-END] + +.. + _[EXERCISE2-BEGIN] + + +2. Printk +--------- + +Watch the virtual machine console. Why were the messages displayed directly +to the virtual machine console? + +Configure the system such that the messages are not displayed directly +on the serial console, and they can only be inspected using ``dmesg``. + +.. hint:: One option is to set the console log level by writting + the desired level to ``/proc/sys/kernel/printk``. + Use a value smaller than the level used for the prints in + the source code of the module. + +Load/unload the module again. +The messages should not be printed to the virtual machine console, +but they should be visible when running ``dmesg``. + +.. + _[EXERCISE2-END] + +.. + _[EXERCISE3-BEGIN] + +3. Error +-------- + +Generate the skeleton for the task named **3-error-mod**. Compile the +sources and get the corresponding kernel module. + +Why have compilation +errors occurred? **Hint:** How does this module differ from the previous module? + +Modify the module to solve the cause of those errors, then compile and test +the module. + +.. + _[EXERCISE3-END] + +.. + _[EXERCISE4-BEGIN] + +4. Sub-modules +-------------- + +Inspect the C source files ``mod1.c`` and ``mod2.c`` in :file:`4-multi-mod/`. +Module 2 contains only the definition of a function used by module 1. + +Change the :file:`Kbuild` file to create the ``multi_mod.ko`` module from the +two C source files. + +.. hint:: Read the `Compiling kernel modules`_ section of the lab. + +Compile, copy, boot the VM, load and unload the kernel module. Make sure messages +are properly displayed on the console. + +.. + _[EXERCISE4-END] + +.. + _[EXERCISE5-BEGIN] + +5. Kernel oops +-------------- + +Enter the directory for the task **5-oops-mod** and inspect the +C source file. Notice where the problem will occur. Add the compilation flag +``-g`` in the Kbuild file. + +.. hint:: Read `Compiling kernel modules`_ section of the lab. + +Compile the corresponding module and load it into the kernel. Identify the memory +address at which the oops appeared. + +.. hint:: Read `Debugging`_ section of the lab. To identify the + address, follow the oops message and extract the value of + the instructions pointer (``EIP``) register. + +Determine which instruction has triggered the oops. + +.. hint:: Use the :file:`proc/modules` information to get the load address of + the kernel module. Use, on the physical machine, objdump + and/or addr2line . Objdump needs debugging support for + compilation! Read the lab's `objdump`_ and `addr2line`_ + sections. + +Try to unload the kernel module. Notice that the operation does not +work because there are references from the kernel module within the +kernel since the oops; Until the release of those references (which is +almost impossible in the case of an oops), the module can not be +unloaded. + +.. + _[EXERCISE5-END] + +.. + _[EXERCISE6-BEGIN] + +6. Module parameters +-------------------- + +Enter the directory for the task **6-cmd-mod** and inspect the C +``cmd_mod.c`` source file. Compile and copy the associated module and +load the kernel module to see the printk message. Then unload the +module from the kernel. + +Without modifying the sources, load the kernel module so that the +message shown is ``Early bird gets tired``. + +.. hint:: The str variable can be changed by passing a parameter to + the module. Find more information `here + `_. + +.. _proc-info: + +.. + _[EXERCISE6-END] + +.. + _[EXERCISE7-BEGIN] + +7. Proc info +------------ + +Check the skeleton for the task named **7-list-proc**. Add code to +display the Process ID (``PID``) and the executable name for the current +process. + +Follow the commands marked with ``TODO``. +The information must be displayed both when loading and unloading the +module. + +.. note:: + * In the Linux kernel, a process is described by the + :c:type:`struct task_struct`. Use |LXR|_ or ``cscope`` to find the + definition of :c:type:`struct task_struct`. + + * To find the structure field that contains the name of the + executable, look for the "executable" comment. + + * The pointer to the structure of the current process + running at a given time in the kernel is given by the + :c:macro:`current` variable (of the type + :c:type:`struct task_struct*`). + +.. hint:: To use :c:macro:`current` you'll need to include the header + in which the :c:type:`struct task_struct` is defined, i.e + ``linux/sched.h``. + +Compile, copy, boot the VM and load the module. Unload the kernel module. + +Repeat the loading/unloading operation. Note that the PIDs of the +displayed processes differ. This is because a process is created +from the executable :file:`/sbin/insmod` when the module is loaded and +when the module is unloaded a process is created from the executable +:file:`/sbin/rmmod`. + +.. + _[EXERCISE7-END] + +.. + _[EXTRA-EXERCISE-BEGIN] + +Extra Exercises +=============== + +1. KDB +------ + +Go to the **8-kdb** directory. Activate KDB over the serial port and enter KDB +mode using :command:`SysRq`. Connect to the pseudo-terminal linked to virtiocon0 +using :command:`minicom`, configure KDB to use the hvc0 serial port: + +.. code-block:: bash + + echo hvc0 > /sys/module/kgdboc/parameters/kgdboc + +and enable it using SysRq (:command:`Ctrl + O g`). +Review the current system status (:command:`help` to see the available KDB +commands). Continue the kernel execution using the :command:`go` command. + +Load the :file:`hello_kdb` module. +The module will simulate a bug when writing to the :file:`/proc/hello_kdb_bug` +file. To simulate a bug, use the below command: + +.. code-block:: bash + + echo 1 > /proc/hello_kdb_bug + +After running the above command, at every oops/panic the kernel stops the +execution and enters debug mode. + +Analyze the stacktrace and determine the code that generated the bug. +How can we find out from KDB the address where the module was loaded? + +In parallel, use GDB in a new window to view the code based on KDB information. + +.. hint:: + Load the symbol file. Use :command:`info line`. + +When writing to :file:`/proc/hello_kdb_break`, the module will increment the +:c:data:`kdb_write_address` variable. Enter KDB and set a breakpoint for each +write access of the :c:data:`kdb_write_address` variable. +Return to kernel to trigger a write using: + +.. code-block:: bash + + echo 1 > /proc/hello_kdb_break + +2. PS Module +------------ + +Update the created kernel module at :ref:`proc-info` in order to display +information about all the processes in the system, when inserting the kernel +module, not just about the current process. Afterwards, compare the obtained +result with the output of the :command:`ps` command. + +.. hint:: + * Processes in the system are structured in a circular list. + + * :c:macro:`for_each _...` macros (such as :c:macro:`for_each_process`) are + useful when you want to navigate the items in a list. + + * To understand how to use a feature or a macro, use |LXR|_ or Vim and + :command:`cscope` and search for usage scenarios. + +3. Memory Info +-------------- + +Create a kernel module that displays the virtual memory areas of the current +process; for each memory area it will display the start address and the end +address. + +.. hint:: + * Start from an existing kernel module. + + * Investigate the structures :c:type:`struct task_struct`, + :c:type:`struct mm_struct` and :c:type:`struct vm_area_struct`. A + memory area is indicated by a structure of type :c:type:`struct + vm_area_struct`. + + * Don't forget to include the headers where the necessary structures are + defined. + +4. Dynamic Debugging +-------------------- + +Go to the **9-dyndbg** directory and compile the :code:`dyndbg.ko` module. + +Familiarize yourself with the :code:`debugfs` file system mounted in +:file:`/debug` and analyze the contents of the file +:file:`/debug/dynamic_debug/control`. Insert the :code:`dyndbg.ko` module and +notice the new content of the :file:`dynamic_debug/control` file. + +What appears extra in the respective file? Run the following command: + +.. code-block:: bash + + grep dyndbg /debug/dynamic_debug/control + +Configure :command:`dyndbg` so that only messages marked as "Important" in +:c:func:`my_debug_func` function are displayed when the module is unloaded. +The exercise will only filter out the :c:func:`pr_debug` calls; :c:func:`printk` +calls being always displayed. + +Specify two ways to filter. + +.. hint:: + Read the `Dynamic debugging`_ section and look at the :command:`dyndbg` + options (for example, :command:`line`, :command:`format`). + +Perform the filtering and revise the :file:`dynamic_debug/control` file. What +has changed? How do you know which calls are activated? + +.. hint:: + Check the :command:`dyndbg` flags. Unload the kernel module and observe the + log messages. + +5. Dynamic Debugging During Initialization +------------------------------------------ + +As you have noticed, :c:func:`pr_debug` calls can only be activated /filtered +after module insertion. In some situations, it might be helpful to view the +messages from the initialization of the module. This can be done by using a +default (fake) parameter called :command:`dyndbg` that can be passed as an +argument to initialize the module. With this parameter you can add /delete +:command:`dyndbg` flags. + +.. hint:: + Read the last part of the `Dynamic debugging`_ section and see the available + flags (e.g.: :command:`+/- p`). + +Read the `Debug Messages section at Module Initialization Time +`_ +and insert the module so that the messages in :c:func:`my_debug_func` (called +:c:func:`dyndbg_init`) are also displayed during initialization. + +.. warning:: + In the VM from the lab, you will need to use :command:`insmod` instead of + :command:`modprobe`. + +Without unloading the module, deactivate :c:func:`pr_debug` calls. + +.. hint:: + You can delete the set flags. Unload the kernel module. + +.. + _[EXTRA-EXERCISE-END] diff --git a/Documentation/teaching/labs/kernel_profiling.rst b/Documentation/teaching/labs/kernel_profiling.rst new file mode 100644 index 00000000000000..34c3810ee9420b --- /dev/null +++ b/Documentation/teaching/labs/kernel_profiling.rst @@ -0,0 +1,474 @@ +================ +Kernel Profiling +================ + +Lab Objectives +============== + + * Familiarize yourself with the basics of Linux kernel profiling + * Understanding basic profiling tools + * Learning profiling methodologies and good practices + +Overview +======== + +Up until now we have studied how the different components of the Linux kernel +work, and how to write drivers that interface with them in order to provide +support for devices or protocols. This has helped us understand how the Linux +kernel works, but most people will not get to write kernel drivers. + +Nonetheless, the skills learned will help us to write applications that better +integrate with the whole operating system. In order to do this, one has to have +a good view of both the user space and the kernel space. + +This session aims to merge the work we have done up until now in the kernel +space with real world use cases where we do not write kernel space code, but we +look through the kernel using profiling tools, in order to debug issues that +we're having when writing regular, low-level, applications. + +Another focus of this session will be learning a general methodology for +debugging software issues, and we will approach some tools that give us insight +from the kernel on the way our application runs. + +Profiling Tools +=============== + +The main tool that we will focus our attention on is ``perf``, which offers +support for tracing applications, and also inspecting general aspects of the +system. We will also be using debugging tools that most people have used in +their day to day life, such as ``htop``, ``ps``, ``lsof`` and others. + +perf +---- + +``perf`` is a tool that instruments the CPU using +tracepoints, kprobes and uprobes. This tool allows us to take a look at what +functions are being called at a given point. This allows us to take a peak at +where the kernel is pending the most time, print out call stacks of functions, +and in general log what the CPU is running. + +``perf`` integrates modules such as: +* static tracing +* dynamic tracing +* resource monitoring + +The tracing interface that is offered by perf can be used by itself, using the +``perf`` command together with its subcommands. + + +.. code-block:: bash + + root@qemux86:~# ./skels/kernel_profiling/perf + + usage: perf [--version] [--help] [OPTIONS] COMMAND [ARGS] + + The most commonly used perf commands are: + annotate Read perf.data (created by perf record) and display annotated code + archive Create archive with object files with build-ids found in perf.data file + bench General framework for benchmark suites + buildid-cache Manage build-id cache. + buildid-list List the buildids in a perf.data file + c2c Shared Data C2C/HITM Analyzer. + config Get and set variables in a configuration file. + data Data file related processing + diff Read perf.data files and display the differential profile + evlist List the event names in a perf.data file + ftrace simple wrapper for kernel's ftrace functionality + inject Filter to augment the events stream with additional information + kallsyms Searches running kernel for symbols + kmem Tool to trace/measure kernel memory properties + kvm Tool to trace/measure kvm guest os + list List all symbolic event types + lock Analyze lock events + mem Profile memory accesses + record Run a command and record its profile into perf.data + report Read perf.data (created by perf record) and display the profile + sched Tool to trace/measure scheduler properties (latencies) + script Read perf.data (created by perf record) and display trace output + stat Run a command and gather performance counter statistics + test Runs sanity tests. + timechart Tool to visualize total system behavior during a workload + top System profiling tool. + version display the version of perf binary + probe Define new dynamic tracepoints + + See 'perf help COMMAND' for more information on a specific command. + +In the output above we can see all of perf's subcommands together with a +description of their functionality, the most significant of which are: + +* ``stat`` - displays statistics such as the number of context switches and page + faults; +* ``top`` - an interactive interface where we can inspect the most frequent + function calls and their caller. This interface allows us direct feedback + while profiling; +* ``list`` - lists the static trace point that we can instrument inside the + kernel. These are useful when trying to get an insight from inside the kernel; +* ``probe`` - add a dynamic trace point that instruments a function call in + order to be recorded by perf; +* ``record`` - records function calls and stack traces based on tracing points + defined by the user; It can also record specific function calls and their + stack traces. The record is saved in a file, named ``perf.data`` by default; +* ``report`` - displays the information saved in a perf recording. + +Another way to use perf's interface is through scripts that wrap over perf that +offer a higher level way of looking at events or data, without needing to know +the intricacies of the command. An example of this is the ``iosnoop.sh`` script, +which displays what I/O transfers are taking place. + +ps +-- + +``ps`` is the Linux tool that allows us to monitor the processes that are +running at a given time on the machine, including the kernel threads. This is a +simple and easy to use way of checking at a glance what processes are running on +the CPU, and what is their CPU and memory usage. + +In order to list all the processes running, we use to ``ps aux`` command in the +following way: + +.. code-block:: c + + TODO + root@qemux86:~/skels/kernel_profiling/0-demo# cd + root@qemux86:~# ps aux + USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND + root 1 0.0 0.5 2004 1256 ? Ss 12:06 0:12 init [5] + root 2 0.0 0.0 0 0 ? S 12:06 0:00 [kthreadd] + [...] + root 350 4.5 4.4 11132 10688 hvc0 T 12:07 17:21 ./io-app + root 1358 0.0 0.0 0 0 ? I 14:30 0:00 [kworker/u2:1-e + root 2293 0.1 1.5 5516 3704 ? Ss 18:18 0:00 sshd: root@pts/ + root 2295 0.0 1.3 3968 3232 pts/0 Ss+ 18:19 0:00 -sh + root 2307 0.0 0.0 0 0 ? I 18:19 0:00 [kworker/u2:2-e + root 2350 0.0 0.7 3032 1792 hvc0 R+ 18:26 0:00 ps aux + root 2392 2.6 0.0 0 0 ? D 18:31 0:00 test-script + +One information of note is that the 7th column represents the that of the +process, ``S`` meaning suspended, ``D`` suspended due to I/O, and ``R`` meaning +running. + +time +---- + +The ``time`` command allows us to inspect the amount of time spent by a +process in I/O, running the application code, or running code in kernel space. +This can be useful in order to find out whether an application's issue comes +from running too much in kernel space, so it has some overhead when it does +system calls, or the issue is in the user code. + +.. code-block:: c + + root@qemux86:~# time dd if=/dev/urandom of=./test-file bs=1K count=10 + 10+0 records in + 10+0 records out + 10240 bytes (10 kB, 10 KiB) copied, 0.00299749 s, 3.4 MB/s + + real 0m0.020s + user 0m0.001s + sys 0m0.015s + +In the output above we timed the generation of a file using ``dd``. The result +of the timing is displayed at the bottom of output. The values outputted by the +tool are the following: + +* ``real`` - the amount of time has passed from the start of the application to + its finishing; +* ``user`` - time spent running the ``dd`` code; +* ``sys`` - time spent running kernel code on behalf of the process. + +We see that the sum of the ``user`` and ``sys`` values doesn't add up to the +``real`` value. This happens either when the application runs on multiple cores, +in which case the sum might be higher, or the application sleeps, in which case +the sum is lower. + +top +--- + +``top`` is an application that is found on most systems which lists in real time +the applications that are running on the system. ``top`` runs interactively, and +it auto-refreshes its output, as opposed to ``ps``. We use this tool when we +want a high level of continuous monitoring. + +Profiling Methodology +===================== + +When doing profiling, our goal is to identify the cause of a problem. Usually +this problem is observed by someone when their application doesn't work as +expected. When we say that an application did not work as expected, this can +mean different things for different people. For example, one person might +complain that the application has a slowdown, while another might say that the +application runs on the CPU, but it doesn't output anything. + +The first step in any problem solving context is to understand the default +behaviour of the application we're trying to debug, and to make sure that it is +now not running in the expected parameters. + +Exercises +========= + +.. include:: ../labs/exercises-summary.hrst +.. |LAB_NAME| replace:: kernel_profiling + +.. note:: + + This session will require us to use the ``perf`` tracing tool. When running + natively on our systems, we have to install the + ``linux-tools--generic`` package using a package manager in order + to run it. Because in our visual machine we don't have access to a package + manager, we will be downloading the ``perf`` binary from `this + `_ link. Download the application in + the ``skels/kernel_profiling`` directory, and grant in execution + permissions. + +.. warning:: + + When running ``perf``, make sure that you're running the downloaded version, + not the version in the ``PATH`` variable. + +.. note:: + + When going through this session's exercises, we will have to run command in + parallel. In order to do this, we will have to connect to the virtual machine + using SSH. We recommend using the ``core-image-sato-sdk-qemu`` image, since it + has the tools that we need. To run the virtual machine using the + ``core-image-sato-sdk-qemu`` file system, uncomment line 16 in the + ``qemu/Makefile`` file. + +.. note:: + + If you wish to run the ``perf-tools`` based scripts that we have included in + the repository, such as ``iosnoop.sh``, you will have to grant it execution + privilleges, in order to be copied to the virtual machine file system. + +.. note:: + + In order to improve the course of SO2, its components and the way it is + conducted, your opinions are very useful to us. Please fill the feedback form + on `curs.upb.ro platform `_. + + The form is anonymous and is active between May 22 and June 2, 2023. The + results will be visible to the SO2 team after all the grades have been + marked. + + We invite you to evaluate the activity of the SO2 team and specify its + strengths and weaknesses and your suggestions for improving the subject. + Your feedback is very important to us to increase the quality of the subject + in the coming years. + + We are particularly interested in: + + * What did you not like and what do you think did not go well? + * Why didn't you like it and why do you think it didn't go well? + * What should we do to make things better? + +0. Demo: Profiling I/O Problems +=============================== + +When working with I/O, we have to keep in mind that it is one of the slowest +systems in the operating system, compared to memory, which is an order of +magnitude faster, and scheduling, which deals with what is currently running on +the CPU. + +Because of this, I/O operations have do be thought out, because you might starve +you application by saturating the system with requests. Another issue that you +might face is that the I/O's slow speed might affect your application's +responsiveness, if it waits for the I/O operations to finish. + +Let's take a look at an application and debug its issues. + +We are going to run the ``io-app`` application, from the ``0-demo`` directory. + +In order to inspect what is running on the CPU, and look at the stack of the +process, we can use the ``perf record`` subcommand in the following way: + +.. code-block:: bash + + root@qemux86:~# ./perf record -a -g + Couldn't synthesize bpf events. + ^C[ perf record: Woken up 7 times to write data ] + [ perf record: Captured and wrote 1.724 MB perf.data (8376 samples) ] + + +perf will record values indefinitely, but we can close it using the ``Ctrl+c`` +hotkey. We used the ``-a`` option in order to probe all CPUs, and ``-g`` option, +which record the whole call stack. + +To visualize the recorded information, we will use the ``perf report`` command, +which will bring up a pager which will display the most frequent function calls +that were found on the CPU, and their call stack. + +.. code-block:: bash + + root@qemux86:~# ./perf report --header -F overhead,comm,parent + # Total Lost Samples: 0 + # + # Samples: 8K of event 'cpu-clock:pppH' + # Event count (approx.): 2094000000 + # + # Overhead Command Parent symbol + # ........ ............... ............. + # + 58.63% io-app [other] + | + --58.62%--__libc_start_main + main + __kernel_vsyscall + | + --58.61%--__irqentry_text_end + do_SYSENTER_32 + do_fast_syscall_32 + __noinstr_text_start + __ia32_sys_write + ksys_write + vfs_write + | + --58.60%--ext4_file_write_iter + ext4_buffered_write_iter + [...] + +We have used the ``--header`` in order to print the table header, and ``-F +overhead,comm,parent``, in order to print the percentage of time where the call +stack, the command and the caller. + +We can see that the ``io-app`` command is doing some writes in the file system, +and this contributes to much of the load on the system. + +Armed with this information, we know that there are many I/O calls being done by +the application. In order to look at the size of these requests, we can use the +``iosnoop.sh`` script in order to see how big these requests are. + +.. code-block:: bash + + root@qemux86:~/skels/kernel_profiling# ./iosnoop.sh 1 + Tracing block I/O. Ctrl-C to end. + COMM PID TYPE DEV BLOCK BYTES LATms + io-app 889 WS 254,0 4800512 1310720 2.10 + io-app 889 WS 254,0 4803072 1310720 2.04 + io-app 889 WS 254,0 4805632 1310720 2.03 + io-app 889 WS 254,0 4808192 1310720 2.43 + io-app 889 WS 254,0 4810752 1310720 3.48 + io-app 889 WS 254,0 4813312 1310720 3.46 + io-app 889 WS 254,0 4815872 524288 1.03 + io-app 889 WS 254,0 5029888 1310720 5.82 + io-app 889 WS 254,0 5032448 786432 5.80 + jbd2/vda-43 43 WS 254,0 2702392 8192 0.22 + kworker/0:1H 34 WS 254,0 2702408 4096 0.40 + io-app 889 WS 254,0 4800512 1310720 2.60 + io-app 889 WS 254,0 4803072 1310720 2.58 + [...] + +From this output we see that the ``io-app`` is reading in a loop from the fact +that the first block ``4800512`` is repeating, and that it is doing big reads, +since it is reading one megabyte fer request. This constant looping adds the +load to the system that we're experiencing. + +1. Investigating Reduced Responsiveness +--------------------------------------- + +The ``io.ko`` module, located in the ``kernel_profiling/1-io`` directory, +decreases the system's responsiveness when inserted. We see that the command +line stutters when typing commands, but when running top, we see that the +system's load is not high, and there aren't any processes that are hogging +resources. + +Find out what the ``io.ko`` module is doing and why is it leading to the +stuttering effect that we experience. + +.. hint:: + + Trace all the functions being called and check where the CPU is + spending most of its time. In order to do this, you can run either ``perf + record`` and ``perf report`` to view the output, or ``perf top``. + +2. Launching New Threads +------------------------ + +We want to run the same function in a loop 100 times in parallel. We have +implemented two solutions inside the ``scheduling`` binary file, located in the +``kernel_profiling/2-scheduling`` directory. + +When executing the ``scheduling`` binary, it prints a message in parallel from +100 running instances. We can tune this execution by running the application +either with the first parameter ``0`` or ``1``. + +Find out which solution is better, and why. + +3. Tuning ``cp`` +---------------- + +Our goal is to write a copy of the ``cp`` tool integrated in Linux, which has +been implemented by the ``memory`` binary, in the ``kernel_profiling/3-memory`` +directory. It implements two approaches that we can take for the copy operation: + +* reading the contents of the source file in a buffer in memory using the + ``read()`` system call, and writing that buffer to the destination file using + the ``write()`` system call; +* mapping the source and destination files to memory using the ``mmap`` system + call, and copying the contents of the source file to the destination in + memory. + +Another tunable parameter that we're going to use is the block size of to copies +that we're going to make, either through reads/writes or in memory. + +1) Investigate which of the two copying mechanisms is faster. For this step, you +will use the 1024 block size. + +2) Once you have found which copying mechanism is faster, change the block size +parameter and see which value gives you the best copies. Why? + +4. I/O Latency +-------------- + +We have written a module that reads the content of a disk. Insert the ``bio.ko`` +module, located in the ``4-bio`` module, we see a large spike in the system's +load, as can be seen in the ``top`` command, but we see that the system is still +responsive. + +Investigate what is causing the increased load to the system. Is it an I/O issue, +or is it a scheduling issue? + +.. hint:: + + Try to trace the I/O operations using ``perf``, or use the + ``iosnoop.sh`` script in order to inspect what I/O is happening at a + certain point. + +5. Bad ELF +---------- + +.. note:: + + This is a bonus exercise that has been tested on a native Linux system. + It may run under the QEMU virtual machine, but the behavior was weird in our testing. + We recommend you used a native (or VirtualBox or VMware) Linux system. + +We managed to build (as part of a `Unikraft `__ build) an ELF file that is valid when doing static analysis, but that can't be executed. +The file is ``bad_elf``, located in the ``5-bad-elf/`` folder. + +Running it triggers a *segmentation fault* message. +Running it using ``strace`` show an error with ``execve()``. + +.. code:: + + ... skels/kernel_profiling/5-bad-elf$ ./bad_elf + Segmentation fault + + ... skels/kernel_profiling/5-bad-elf$ strace ./bad_elf + execve("./bad_elf", ["./bad_elf"], 0x7ffc3349ba50 /* 70 vars \*/) = -1 EINVAL (Invalid argument) + --- SIGSEGV {si_signo=SIGSEGV, si_code=SI_KERNEL, si_addr=NULL} --- + +++ killed by SIGSEGV +++ + Segmentation fault (core dumped) + +The ELF file itself is valid: + +.. code:: + + ... skels/kernel_profiling/5-bad-elf$ readelf -a bad_elf + +The issue is to be detected in the kernel. + +Use either ``perf``, or, better yet `ftrace `__ to inspect the kernel function calls done by the program. +Identify the function call that sends out the ``SIGSEGV`` signal. +Identify the cause of the issue. +Find that cause in the `manual page elf(5) `__. diff --git a/Documentation/teaching/labs/memory_mapping.rst b/Documentation/teaching/labs/memory_mapping.rst new file mode 100644 index 00000000000000..e9d15e152c18ec --- /dev/null +++ b/Documentation/teaching/labs/memory_mapping.rst @@ -0,0 +1,499 @@ +============== +Memory mapping +============== + +Lab objectives +============== + +* Understand address space mapping mechanisms +* Learn about the most important structures related to memory management + +Keywords: + +* address space +* :c:func:`mmap` +* :c:type:`struct page` +* :c:type:`struct vm_area_struct` +* :c:type:`struct vm_struct` +* :c:type:`remap_pfn_range` +* :c:func:`SetPageReserved` +* :c:func:`ClearPageReserved` + + +Overview +======== + +In the Linux kernel it is possible to map a kernel address space to a +user address space. This eliminates the overhead of copying user space +information into the kernel space and vice versa. This can be done +through a device driver and the user space device interface +(:file:`/dev`). + +This feature can be used by implementing the :c:func:`mmap` operation +in the device driver's :c:type:`struct file_operations` and using the +:c:func:`mmap` system call in user space. + +The basic unit for virtual memory management is a page, which size is +usually 4K, but it can be up to 64K on some platforms. Whenever we +work with virtual memory we work with two types of addresses: virtual +address and physical address. All CPU access (including from kernel +space) uses virtual addresses that are translated by the MMU into +physical addresses with the help of page tables. + +A physical page of memory is identified by the Page Frame Number +(PFN). The PFN can be easily computed from the physical address by +dividing it with the size of the page (or by shifting the physical +address with PAGE_SHIFT bits to the right). + +.. image:: ../res/paging.png + :width: 49 % + +For efficiency reasons, the virtual address space is divided into +user space and kernel space. For the same reason, the kernel space +contains a memory mapped zone, called **lowmem**, which is contiguously +mapped in physical memory, starting from the lowest possible physical +address (usually 0). The virtual address where lowmem is mapped is +defined by :c:macro:`PAGE_OFFSET`. + +On a 32bit system, not all available memory can be mapped in lowmem and +because of that there is a separate zone in kernel space called +**highmem** which can be used to arbitrarily map physical memory. + +Memory allocated by :c:func:`kmalloc` resides in lowmem and it is +physically contiguous. Memory allocated by :c:func:`vmalloc` is not +contiguous and does not reside in lowmem (it has a dedicated zone in +highmem). + +.. image:: ../res/kernel-virtmem-map.png + :width: 49 % + +Structures used for memory mapping +================================== + +Before discussing about the memory mapping mechanism over a device, +we will present some of the basic structures used by the Linux memory +management subsystem. +Some of the basic structures are: :c:type:`struct page`, +:c:type:`struct vm_area_struct`, :c:type:`struct mm_struct`. + +:c:type:`struct page` +--------------------- + +:c:type:`struct page` is used to embed information about all physical +pages in the system. The kernel has a :c:type:`struct page` structure +for all pages in the system. + +There are many functions that interact with this structure: + +* :c:func:`virt_to_page` returns the page associated with a virtual + address +* :c:func:`pfn_to_page` returns the page associated with a page frame + number +* :c:func:`page_to_pfn` return the page frame number associated with a + :c:type:`struct page` +* :c:func:`page_address` returns the virtual address of a + :c:type:`struct page`; this functions can be called only for pages from + lowmem +* :c:func:`kmap` creates a mapping in kernel for an arbitrary physical + page (can be from highmem) and returns a virtual address that can be + used to directly reference the page + +:c:type:`struct vm_area_struct` +------------------------------- + +:c:type:`struct vm_area_struct` holds information about a contiguous +virtual memory area. The memory areas of a process can be viewed by +inspecting the *maps* attribute of the process via procfs: + +.. code-block:: shell + + root@qemux86:~# cat /proc/1/maps + #address perms offset device inode pathname + 08048000-08050000 r-xp 00000000 fe:00 761 /sbin/init.sysvinit + 08050000-08051000 r--p 00007000 fe:00 761 /sbin/init.sysvinit + 08051000-08052000 rw-p 00008000 fe:00 761 /sbin/init.sysvinit + 092e1000-09302000 rw-p 00000000 00:00 0 [heap] + 4480c000-4482e000 r-xp 00000000 fe:00 576 /lib/ld-2.25.so + 4482e000-4482f000 r--p 00021000 fe:00 576 /lib/ld-2.25.so + 4482f000-44830000 rw-p 00022000 fe:00 576 /lib/ld-2.25.so + 44832000-449a9000 r-xp 00000000 fe:00 581 /lib/libc-2.25.so + 449a9000-449ab000 r--p 00176000 fe:00 581 /lib/libc-2.25.so + 449ab000-449ac000 rw-p 00178000 fe:00 581 /lib/libc-2.25.so + 449ac000-449af000 rw-p 00000000 00:00 0 + b7761000-b7763000 rw-p 00000000 00:00 0 + b7763000-b7766000 r--p 00000000 00:00 0 [vvar] + b7766000-b7767000 r-xp 00000000 00:00 0 [vdso] + bfa15000-bfa36000 rw-p 00000000 00:00 0 [stack] + +A memory area is characterized by a start address, a stop address, +length, permissions. + +A :c:type:`struct vm_area_struct` is created at each :c:func:`mmap` +call issued from user space. A driver that supports the :c:func:`mmap` +operation must complete and initialize the associated +:c:type:`struct vm_area_struct`. The most important fields of this +structure are: + +* :c:member:`vm_start`, :c:member:`vm_end` - the beginning and the end of + the memory area, respectively (these fields also appear in + :file:`/proc//maps`); +* :c:member:`vm_file` - the pointer to the associated file structure (if any); +* :c:member:`vm_pgoff` - the offset of the area within the file; +* :c:member:`vm_flags` - a set of flags; +* :c:member:`vm_ops` - a set of working functions for this area +* :c:member:`vm_next`, :c:member:`vm_prev` - the areas of the same process + are chained by a list structure + +:c:type:`struct mm_struct` +-------------------------- + +:c:type:`struct mm_struct` encompasses all memory areas associated +with a process. The :c:member:`mm` field of :c:type:`struct task_struct` +is a pointer to the :c:type:`struct mm_struct` of the current process. + + +Device driver memory mapping +============================ + +Memory mapping is one of the most interesting features of a Unix +system. From a driver's point of view, the memory-mapping facility +allows direct memory access to a user space device. + +To assign a :c:func:`mmap` operation to a driver, the :c:member:`mmap` +field of the device driver's :c:type:`struct file_operations` must be +implemented. If that is the case, the user space process can then use +the :c:func:`mmap` system call on a file descriptor associated with +the device. + +The mmap system call takes the following parameters: + +.. code-block:: c + + void *mmap(caddr_t addr, size_t len, int prot, + int flags, int fd, off_t offset); + +To map memory between a device and user space, the user process must +open the device and issue the :c:func:`mmap` system call with the resulting +file descriptor. + +The device driver :c:func:`mmap` operation has the following signature: + +.. code-block:: c + + int (*mmap)(struct file *filp, struct vm_area_struct *vma); + +The *filp* field is a pointer to a :c:type:`struct file` created when +the device is opened from user space. The *vma* field is used to +indicate the virtual address space where the memory should be mapped +by the device. A driver should allocate memory (using +:c:func:`kmalloc`, :c:func:`vmalloc`, :c:func:`alloc_pages`) and then +map it to the user address space as indicated by the *vma* parameter +using helper functions such as :c:func:`remap_pfn_range`. + +:c:func:`remap_pfn_range` will map a contiguous physical address space +into the virtual space represented by :c:type:`vm_area_struct`: + +.. code-block:: c + + int remap_pfn_range (structure vm_area_struct *vma, unsigned long addr, + unsigned long pfn, unsigned long size, pgprot_t prot); + +:c:func:`remap_pfn_range` expects the following parameters: + +* *vma* - the virtual memory space in which mapping is made; +* *addr* - the virtual address space from where remapping begins; page + tables for the virtual address space between addr and addr + size + will be formed as needed +* *pfn* - the page frame number to which the virtual address should be + mapped +* *size* - the size (in bytes) of the memory to be mapped +* *prot* - protection flags for this mapping + +Here is an example of using this function that contiguously maps the +physical memory starting at page frame number *pfn* (memory that was +previously allocated) to the *vma->vm_start* virtual address: + +.. code-block:: c + + struct vm_area_struct *vma; + unsigned long len = vma->vm_end - vma->vm_start; + int ret ; + + ret = remap_pfn_range(vma, vma->vm_start, pfn, len, vma->vm_page_prot); + if (ret < 0) { + pr_err("could not map the address area\n"); + return -EIO; + } + +To obtain the page frame number of the physical memory we must +consider how the memory allocation was performed. For each +:c:func:`kmalloc`, :c:func:`vmalloc`, :c:func:`alloc_pages`, we must +used a different approach. For :c:func:`kmalloc` we can use something +like: + +.. code-block:: c + + static char *kmalloc_area; + + unsigned long pfn = virt_to_phys((void *)kmalloc_area)>>PAGE_SHIFT; + +while for :c:func:`vmalloc`: + +.. code-block:: c + + static char *vmalloc_area; + + unsigned long pfn = vmalloc_to_pfn(vmalloc_area); + +and finally for :c:func:`alloc_pages`: + +.. code-block:: c + + struct page *page; + + unsigned long pfn = page_to_pfn(page); + +.. attention:: Note that memory allocated with :c:func:`vmalloc` is not + physically contiguous so if we want to map a range allocated + with :c:func:`vmalloc`, we have to map each page individually + and compute the physical address for each page. + +Since the pages are mapped to user space, they might be swapped +out. To avoid this we must set the PG_reserved bit on the page. +Enabling is done using :c:func:`SetPageReserved` while reseting it +(which must be done before freeing the memory) is done with +:c:func:`ClearPageReserved`: + +.. code-block:: c + + void alloc_mmap_pages(int npages) + { + int i; + char *mem = kmalloc(PAGE_SIZE * npages); + + if (!mem) + return mem; + + for(i = 0; i < npages * PAGE_SIZE; i += PAGE_SIZE) + SetPageReserved(virt_to_page(((unsigned long)mem) + i)); + + return mem; + } + + void free_mmap_pages(void *mem, int npages) + { + int i; + + for(i = 0; i < npages * PAGE_SIZE; i += PAGE_SIZE) + ClearPageReserved(virt_to_page(((unsigned long)mem) + i)); + + kfree(mem); + } + + +Further reading +=============== + +* `Linux Device Drivers 3rd Edition - Chapter 15. Memory Mapping and DMA `_ +* `Linux Device Driver mmap Skeleton `_ +* `Driver porting: supporting mmap () `_ +* `Device Drivers Concluded `_ +* `mmap `_ + +Exercises +========= + +.. include:: ../labs/exercises-summary.hrst +.. |LAB_NAME| replace:: memory_mapping + +1. Mapping contiguous physical memory to userspace +-------------------------------------------------- + +Implement a device driver that maps contiguous physical memory +(e.g. obtained via :c:func:`kmalloc`) to userspace. + +Review the `Device driver memory mapping`_ section, generate the +skeleton for the task named **kmmap** and fill in the areas marked +with **TODO 1**. + +Start with allocating a NPAGES+2 memory area page using :c:func:`kmalloc` +in the module init function and find the first address in the area that is +aligned to a page boundary. + +.. hint:: The size of a page is *PAGE_SIZE*. + + Store the allocated area in *kmalloc_ptr* and the page + aligned address in *kmalloc_area*: + + Use :c:func:`PAGE_ALIGN` to determine *kmalloc_area*. + +Enable the PG_reserved bit of each page with +:c:func:`SetPageReserved`. Clear the bit with +:c:func:`ClearPageReserved` before freeing the memory. + +.. hint:: Use :c:func:`virt_to_page` to translate virtual pages into + physical pages, as required by :c:func:`SetPageReserved` + and :c:func:`ClearPageReserved`. + +For verification purpose (using the test below), fill in the first 4 +bytes of each page with the following values: 0xaa, 0xbb, 0xcc, 0xdd. + +Implement the :c:func:`mmap` driver function. + +.. hint:: For mapping, use :c:func:`remap_pfn_range`. The third + argument for :c:func:`remap_pfn_range` is a page frame number (PFN). + + To convert from virtual kernel address to physical address, + use :c:func:`virt_to_phys`. + + To convert a physical address to its PFN, shift the address + with PAGE_SHIFT bits to the right. + +For testing, load the kernel module and run: + +.. code-block:: shell + + root@qemux86:~# skels/memory_mapping/test/mmap-test 1 + +If everything goes well, the test will show "matched" messages. + +2. Mapping non-contiguous physical memory to userspace +------------------------------------------------------ + +Implement a device driver that maps non-contiguous physical memory +(e.g. obtained via :c:func:`vmalloc`) to userspace. + +Review the `Device driver memory mapping`_ section, generate the +skeleton for the task named **vmmap** and fill in the areas marked +with **TODO 1**. + +Allocate a memory area of NPAGES with :c:func:`vmalloc`. + +.. hint:: The size of a page is *PAGE_SIZE*. + Store the allocated area in *vmalloc_area*. + Memory allocated by :c:func:`vmalloc` is paged aligned. + +Enable the PG_reserved bit of each page with +:c:func:`SetPageReserved`. Clear the bit with +:c:func:`ClearPageReserved` before freeing the memory. + +.. hint:: Use :c:func:`vmalloc_to_page` to translate virtual pages + into physical pages used by the functions + :c:func:`SetPageReserved` and :c:func:`ClearPageReserved`. + +For verification purpose (using the test below), fill in the first 4 +bytes of each page with the following values: 0xaa, 0xbb, 0xcc, 0xdd. + +Implement the mmap driver function. + +.. hint:: To convert from virtual vmalloc address to physical address, + use :c:func:`vmalloc_to_pfn` which returns a PFN directly. + +.. attention:: vmalloc pages are not physically contiguous so it is + needed to use :c:func:`remap_pfn_range` for each page. + + Loop through all virtual pages and for each: + * determine the physical address + * map it with :c:func:`remap_pfn_range` + + Make sure that you determine the physical address + each time and that you use a range of one page for mapping. + +For testing, load the kernel module and run: + +.. code-block:: shell + + root@qemux86:~# skels/memory_mapping/test/mmap-test 1 + +If everything goes well, the test will show "matched" messages. + +3. Read / write operations in mapped memory +------------------------------------------- + +Modify one of the previous modules to allow read / write operations on +your device. This is a didactic exercise to see that the same space +can also be used with the :c:func:`mmap` call and with :c:func:`read` +and :c:func:`write` calls. + +Fill in areas marked with **TODO 2**. + +.. note:: The offset parameter sent to the read / write operation can + be ignored as all reads / writes from the test program will + be done with 0 offsets. + +For testing, load the kernel module and run: + +.. code-block:: shell + + root@qemux86:~# skels/memory_mapping/test/mmap-test 2 + + +4. Display memory mapped in procfs +---------------------------------- + +Using one of the previous modules, create a procfs file in which you +display the total memory mapped by the calling process. + +Fill in the areas marked with **TODO 3**. + +Create a new entry in procfs (:c:macro:`PROC_ENTRY_NAME`, defined in +:file:`mmap-test.h`) that will show the total memory mapped by the process +that called the :c:func:`read` on that file. + +.. hint:: Use :c:func:`proc_create`. For the mode parameter, use 0, + and for the parent parameter use NULL. Use + :c:func:`my_proc_file_ops` for operations. + +In the module exit function, delete the :c:macro:`PROC_ENTRY_NAME` entry +using :c:func:`remove_proc_entry`. + +.. note:: A (complex) use and description of the :c:type:`struct + seq_file` interface can be found here in this `example + `_ . + + For this exercise, just a simple use of the interface + described `here `_ is + sufficient. Check the "extra-simple" API described there. + +In the :c:func:`my_seq_show` function you will need to: + +* Obtain the :c:type:`struct mm_struct` structure of the current process + using the :c:func:`get_task_mm` function. + + .. hint:: The current process is available via the *current* variable + of type :c:type:`struct task_struct*`. + +* Iterate through the entire :c:type:`struct vm_area_struct` list + associated with the process. + + .. hint:: Use the variable :c:data:`vma_iterator` and start from + :c:data:`mm->mmap`. Use the :c:member:`vm_next` field of + the :c:type:`struct vm_area_struct` to navigate through + the list of memory areas. Stop when you reach :c:macro:`NULL`. + +* Use *vm_start* and *vm_end* for each area to compute the total size. + +* Use :c:func:`pr_info("%lx %lx\n, ...)` to print *vm_start* and *vm_end* for + each area. + +* To release :c:type:`struct mm_struct`, decrement the reference + counter of the structure using :c:func:`mmput`. + +* Use :c:func:`seq_printf` to write to the file. Show only the total count, + no other messages. Do not even show newline (\n). + +In :c:func:`my_seq_open` register the display function +(:c:func:`my_seq_show`) using :c:func:`single_open`. + +.. note:: :c:func:`single_open` can use :c:macro:`NULL` as its third argument. + +For testing, load the kernel module and run: + +.. code-block:: shell + + root@qemux86:~# skels/memory_mapping/test/mmap-test 3 + +.. note:: The test waits for a while (it has an internal sleep + instruction). As long as the test waits, use the + :command:`pmap` command in another console to see the + mappings of the test and compare those to the test results. diff --git a/Documentation/teaching/labs/networking.rst b/Documentation/teaching/labs/networking.rst new file mode 100644 index 00000000000000..3eab836d5a3285 --- /dev/null +++ b/Documentation/teaching/labs/networking.rst @@ -0,0 +1,1262 @@ +============================ +Networking +============================ + +Lab objectives +============== + + * Understanding the Linux kernel networking architecture + * Acquiring practical IP packet management skills using a packet filter or + firewall + * Familiarize yourself with how to use sockets at the Linux kernel level + +Overview +======== + +The development of the Internet has led to an exponential increase in network +applications and, as a consequence, to increasing the speed and productivity +requirements of an operating system's networking subsystem. The networking +subsystem is not an essential component of an operating system kernel (the Linux +kernel can be compiled without networking support). It is, however, quite +unlikely for a computing system (or even an embedded device) to have a +non-networked operating system due to the need for connectivity. Modern operating +systems use the `TCP/IP stack +`_. Their kernel +implements protocols up to the transport layer, while application layer protocols +are typically implemented in user space (HTTP, FTP, SSH, etc.). + +Networking in user space +------------------------ + +In user space the abstraction of network communication is the socket. The +socket abstracts a communication channel and is the kernel-based TCP/IP stack +interaction interface. An IP socket is associated with an IP address, the +transport layer protocol used (TCP, UDP etc) and a port. Common function calls +that use sockets are: creation (``socket``), initialization +(``bind``), connecting (``connect``), waiting for a connection +(``listen``, ``accept``), closing a socket (``close``). + +Network communication is accomplished via ``read``/``write`` or ``recv``/``send`` calls +for TCP sockets and ``recvfrom``/``sendto`` for UDP sockets. Transmission and +reception operations are transparent to the application, leaving encapsulation +and transmission over network at the kernel's discretion. However, it is +possible to implement the TCP/IP stack in user space using raw sockets (the +``PF_PACKET`` option when creating a socket), or implementing an application +layer protocol in kernel (`TUX web server +`_). + +For more details about user space programming using sockets, see `Beej's Guide to +Network Programming Using Internet +Sockets `_. + +Linux networking +================ + +The Linux kernel provides three basic structures for working with network +packets: :c:type:`struct socket`, :c:type:`struct sock` and :c:type:`struct +sk_buff`. + +The first two are abstractions of a socket: + + * :c:type:`struct socket` is an abstraction very close to user space, ie `BSD + sockets `_ used to program + network applications; + * :c:type:`struct sock` or *INET socket* in Linux terminology is the network + representation of a socket. + +The two structures are related: the :c:type:`struct socket` contains an INET +socket field, and the :c:type:`struct sock` has a BSD socket that holds it. + +The :c:type:`struct sk_buff` structure is the representation of a network packet +and its status. The structure is created when a kernel packet is received, +either from the user space or from the network interface. + +The :c:type:`struct socket` structure +------------------------------------- + +The :c:type:`struct socket` structure is the kernel representation of a BSD +socket, the operations that can be executed on it are similar to those offered +by the kernel (through system calls). Common operations with sockets +(creation, initialization/bind, closing, etc.) result in specific system +calls; they work with the :c:type:`struct socket` structure. + +The :c:type:`struct socket` operations are described in :file:`net/socket.c` and +are independent of the protocol type. The :c:type:`struct socket` structure is thus +a generic interface over particular network operations implementations. +Typically, the names of these operations begin with the ``sock_`` prefix. + +.. _SocketStructOps: + +Operations on the socket structure +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Socket operations are: + +Creation +"""""""" + +Creation is similar to calling the :c:func:`socket` function in user space, but the +:c:type:`struct socket` created will be stored in the ``res`` parameter: + + * ``int sock_create(int family, int type, int protocol, struct socket **res)`` + creates a socket after the :c:func:`socket` system call; + * ``int sock_create_kern(struct net *net, int family, int type, int protocol, + struct socket **res)`` creates a kernel socket; + * ``int sock_create_lite(int family, int type, int protocol, struct socket **res)`` + creates a kernel socket without parameter sanity checks. + +The parameters of these calls are as follows: + + * ``net``, where it is present, used as reference to the network namespace used; + we will usually initialize it with ``init_net``; + * ``family`` represents the family of protocols used in the transfer of + information; they usually begin with the ``PF_`` (Protocol Family) string; + the constants representing the family of protocols used are found in + :file:`linux/socket.h`, of which the most commonly used is ``PF_INET``, for + TCP/IP protocols; + * ``type`` is the type of socket; the constants used for this parameter are + found in :file:`linux/net.h`, of which the most used are ``SOCK_STREAM`` for + a connection based source-to-destination communication and ``SOCK_DGRAM`` + for connectionless communication; + * ``protocol`` represents the protocol used and is closely related to the + ``type`` parameter; the constants used for this parameter are found in + :file:`linux/in.h`, of which the most used are ``IPPROTO_TCP`` for TCP and + ``IPPROTO_UDP`` for UDP. + +To create a TCP socket in kernel space, you must call: + +.. code-block:: c + + struct socket *sock; + int err; + + err = sock_create_kern(&init_net, PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock); + if (err < 0) { + /* handle error */ + } + +and for creating UDP sockets: + +.. code-block:: c + + struct socket *sock; + int err; + + err = sock_create_kern(&init_net, PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock); + if (err < 0) { + /* handle error */ + } + +A usage sample is part of the :c:func:`sys_socket` system call handler: + +.. code-block:: c + + SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol) + { + int retval; + struct socket *sock; + int flags; + + /* Check the SOCK_* constants for consistency. */ + BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC); + BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK); + BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK); + BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK); + + flags = type & ~SOCK_TYPE_MASK; + if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) + return -EINVAL; + type &= SOCK_TYPE_MASK; + + if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK)) + flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK; + + retval = sock_create(family, type, protocol, &sock); + if (retval < 0) + goto out; + + return sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK)); + } + +Closing +""""""" + +Close connection (for sockets using connection) and release associated +resources: + + * ``void sock_release(struct socket *sock)`` calls the ``release`` function in + the ``ops`` field of the socket structure: + +.. code-block:: c + + void sock_release(struct socket *sock) + { + if (sock->ops) { + struct module *owner = sock->ops->owner; + + sock->ops->release(sock); + sock->ops = NULL; + module_put(owner); + } + //... + } + +Sending/receiving messages +"""""""""""""""""""""""""" + +The messages are sent/received using the following functions: + + * ``int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags);`` + * ``int kernel_recvmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec, size_t num, size_t size, int flags);`` + * ``int sock_sendmsg(struct socket *sock, struct msghdr *msg);`` + * ``int kernel_sendmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec, size_t num, size_t size);`` + +The message sending/receiving functions will then call the ``sendmsg``/ +``recvmsg`` function in the ``ops`` field of the socket. Functions +containing ``kernel_`` as a prefix are used when the socket is used in the +kernel. + +The parameters are: + + * ``msg``, a :c:type:`struct msghdr` structure, containing the message to be + sent/received. Among the important components of this structure are ``msg_name`` + and ``msg_namelen``, which, for UDP sockets, must be filled in with the address + to which the message is sent (:c:type:`struct sockaddr_in`); + * ``vec``, a :c:type:`struct kvec` structure, containing a pointer to the buffer + containing its data and size; as can be seen, it has a similar structure to the + :c:type:`struct iovec` structure (the :c:type:`struct iovec` structure + corresponds to the user space data, and the :c:type:`struct kvec` structure + corresponds to kernel space data). + +A usage example can be seen in the :c:func:`sys_sendto` system call handler: + +.. code-block:: c + + SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len, + unsigned int, flags, struct sockaddr __user *, addr, + int, addr_len) + { + struct socket *sock; + struct sockaddr_storage address; + int err; + struct msghdr msg; + struct iovec iov; + int fput_needed; + + err = import_single_range(WRITE, buff, len, &iov, &msg.msg_iter); + if (unlikely(err)) + return err; + sock = sockfd_lookup_light(fd, &err, &fput_needed); + if (!sock) + goto out; + + msg.msg_name = NULL; + msg.msg_control = NULL; + msg.msg_controllen = 0; + msg.msg_namelen = 0; + if (addr) { + err = move_addr_to_kernel(addr, addr_len, &address); + if (err < 0) + goto out_put; + msg.msg_name = (struct sockaddr *)&address; + msg.msg_namelen = addr_len; + } + if (sock->file->f_flags & O_NONBLOCK) + flags |= MSG_DONTWAIT; + msg.msg_flags = flags; + err = sock_sendmsg(sock, &msg); + + out_put: + fput_light(sock->file, fput_needed); + out: + return err; + } + +The :c:type:`struct socket` fields +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. code-block:: c + + /** + * struct socket - general BSD socket + * @state: socket state (%SS_CONNECTED, etc) + * @type: socket type (%SOCK_STREAM, etc) + * @flags: socket flags (%SOCK_NOSPACE, etc) + * @ops: protocol specific socket operations + * @file: File back pointer for gc + * @sk: internal networking protocol agnostic socket representation + * @wq: wait queue for several uses + */ + struct socket { + socket_state state; + + short type; + + unsigned long flags; + + struct socket_wq __rcu *wq; + + struct file *file; + struct sock *sk; + const struct proto_ops *ops; + }; + +The noteworthy fields are: + + * ``ops`` - the structure that stores pointers to protocol-specific functions; + * ``sk`` - The ``INET socket`` associated with it. + +The :c:type:`struct proto_ops` structure +"""""""""""""""""""""""""""""""""""""""" + +The :c:type:`struct proto_ops` structure contains the implementations of the specific +operations implemented (TCP, UDP, etc.); these functions will be called from +generic functions through :c:type:`struct socket` (:c:func:`sock_release`, +:c:func:`sock_sendmsg`, etc.) + +The :c:type:`struct proto_ops` structure therefore contains a number of function +pointers for specific protocol implementations: + +.. code-block:: c + + struct proto_ops { + int family; + struct module *owner; + int (*release) (struct socket *sock); + int (*bind) (struct socket *sock, + struct sockaddr *myaddr, + int sockaddr_len); + int (*connect) (struct socket *sock, + struct sockaddr *vaddr, + int sockaddr_len, int flags); + int (*socketpair)(struct socket *sock1, + struct socket *sock2); + int (*accept) (struct socket *sock, + struct socket *newsock, int flags, bool kern); + int (*getname) (struct socket *sock, + struct sockaddr *addr, + int peer); + //... + } + +The initialization of the ``ops`` field from :c:type:`struct socket` is done in +the :c:func:`__sock_create` function, by calling the :c:func:`create` function, +specific to each protocol; an equivalent call is the implementation of the +:c:func:`__sock_create` function: + +.. code-block:: c + + //... + err = pf->create(net, sock, protocol, kern); + if (err < 0) + goto out_module_put; + //... + +This will instantiate the function pointers with calls specific to the protocol +type associated with the socket. The :c:func:`sock_register` and +:c:func:`sock_unregister` calls are used to fill the ``net_families`` vector. + +For the rest of the socket operations (other than creating, closing, and +sending/receiving a message as described above in the `Operations on the socket +structure`_ section), the functions sent via pointers in this structure will be +called. For example, for ``bind``, which associates a socket with a socket on +the local machine, we will have the following code sequence: + +.. code-block:: c + + #define MY_PORT 60000 + + struct sockaddr_in addr = { + .sin_family = AF_INET, + .sin_port = htons (MY_PORT), + .sin_addr = { htonl (INADDR_LOOPBACK) } + }; + + //... + err = sock->ops->bind (sock, (struct sockaddr *) &addr, sizeof(addr)); + if (err < 0) { + /* handle error */ + } + //... + +As you can see, for transmitting the address and port information that +will be associated with the socket, a :c:type:`struct sockaddr_in` is filled. + +The :c:type:`struct sock` structure +----------------------------------- + +The :c:type:`struct sock` describes an ``INET`` socket. Such a structure is +associated with a user space socket and implicitly with a :c:type:`struct +socket` structure. The structure is used to store information about the status +of a connection. The structure's fields and associated operations usually begin +with the ``sk_`` string. Some fields are listed below: + +.. code-block:: c + + struct sock { + //... + unsigned int sk_padding : 1, + sk_no_check_tx : 1, + sk_no_check_rx : 1, + sk_userlocks : 4, + sk_protocol : 8, + sk_type : 16; + //... + struct socket *sk_socket; + //... + struct sk_buff *sk_send_head; + //... + void (*sk_state_change)(struct sock *sk); + void (*sk_data_ready)(struct sock *sk); + void (*sk_write_space)(struct sock *sk); + void (*sk_error_report)(struct sock *sk); + int (*sk_backlog_rcv)(struct sock *sk, + struct sk_buff *skb); + void (*sk_destruct)(struct sock *sk); + }; + +\ + + * ``sk_protocol`` is the type of protocol used by the socket; + * ``sk_type`` is the socket type (``SOCK_STREAM``, ``SOCK_DGRAM``, etc.); + * ``sk_socket`` is the BSD socket that holds it; + * ``sk_send_head`` is the list of :c:type:`struct sk_buff` structures for + transmission; + * the function pointers at the end are callbacks for different situations. + +Initializing the :c:type:`struct sock` and attaching it to a BSD socket is done +using the callback created from ``net_families`` (called +:c:func:`__sock_create`). Here's how to initialize the :c:type:`struct sock` +structure for the IP protocol, in the :c:func:`inet_create` function: + +.. code-block:: c + + /* + * Create an inet socket. + */ + + static int inet_create(struct net *net, struct socket *sock, int protocol, + int kern) + { + + struct sock *sk; + + //... + err = -ENOBUFS; + sk = sk_alloc(net, PF_INET, GFP_KERNEL, answer_prot, kern); + if (!sk) + goto out; + + err = 0; + if (INET_PROTOSW_REUSE & answer_flags) + sk->sk_reuse = SK_CAN_REUSE; + + + //... + sock_init_data(sock, sk); + + sk->sk_destruct = inet_sock_destruct; + sk->sk_protocol = protocol; + sk->sk_backlog_rcv = sk->sk_prot->backlog_rcv; + //... + } + +.. _StructSKBuff: + +The :c:type:`struct sk_buff` structure +-------------------------------------- + +The :c:type:`struct sk_buff` (socket buffer) describes a network packet. The +structure fields contain information about both the header and packet contents, +the protocols used, the network device used, and pointers to the other +:c:type:`struct sk_buff`. A summary description of the content of the structure +is presented below: + +.. code-block:: c + + struct sk_buff { + union { + struct { + /* These two members must be first. */ + struct sk_buff *next; + struct sk_buff *prev; + + union { + struct net_device *dev; + /* Some protocols might use this space to store information, + * while device pointer would be NULL. + * UDP receive path is one user. + */ + unsigned long dev_scratch; + }; + }; + + struct rb_node rbnode; /* used in netem & tcp stack */ + }; + struct sock *sk; + + union { + ktime_t tstamp; + u64 skb_mstamp; + }; + + /* + * This is the control buffer. It is free to use for every + * layer. Please put your private variables there. If you + * want to keep them across layers you have to do a skb_clone() + * first. This is owned by whoever has the skb queued ATM. + */ + char cb[48] __aligned(8); + + unsigned long _skb_refdst; + void (*destructor)(struct sk_buff *skb); + union { + struct { + unsigned long _skb_refdst; + void (*destructor)(struct sk_buff *skb); + }; + struct list_head tcp_tsorted_anchor; + }; + /* ... */ + + unsigned int len, + data_len; + __u16 mac_len, + hdr_len; + + /* ... */ + + __be16 protocol; + __u16 transport_header; + __u16 network_header; + __u16 mac_header; + + /* private: */ + __u32 headers_end[0]; + /* public: */ + + /* These elements must be at the end, see alloc_skb() for details. */ + sk_buff_data_t tail; + sk_buff_data_t end; + unsigned char *head, + *data; + unsigned int truesize; + refcount_t users; + }; + +where: + + * ``next`` and ``prev`` are pointers to the next, and previous element in the + buffer list; + * ``dev`` is the device which sends or receives the buffer; + * ``sk`` is the socket associated with the buffer; + * ``destructor`` is the callback that deallocates the buffer; + * ``transport_header``, ``network_header``, and ``mac_header`` are offsets + between the beginning of the packet and the beginning of the various headers + in the packets. They are internally maintained by the various processing + layers through which the packet passes. To get pointers to the headers, use + one of the following functions: :c:func:`tcp_hdr`, :c:func:`udp_hdr`, + :c:func:`ip_hdr`, etc. In principle, each protocol provides a function to + get a reference to the header of that protocol within a received packet. + Keep in mind that the ``network_header`` field is not set until the packet + reaches the network layer and the ``transport_header`` field is not set + until the packet reaches the transport layer. + +The structure of an `IP header `_ +(:c:type:`struct iphdr`) has the following fields: + +.. code-block:: c + + struct iphdr { + #if defined(__LITTLE_ENDIAN_BITFIELD) + __u8 ihl:4, + version:4; + #elif defined (__BIG_ENDIAN_BITFIELD) + __u8 version:4, + ihl:4; + #else + #error "Please fix " + #endif + __u8 tos; + __be16 tot_len; + __be16 id; + __be16 frag_off; + __u8 ttl; + __u8 protocol; + __sum16 check; + __be32 saddr; + __be32 daddr; + /*The options start here. */ + }; + +where: + + * ``protocol`` is the transport layer protocol used; + * ``saddr`` is the source IP address; + * ``daddr`` is the destination IP address. + +The structure of a `TCP header +`_ +(:c:type:`struct tcphdr`) has the following fields: + +.. code-block:: c + + struct tcphdr { + __be16 source; + __be16 dest; + __be32 seq; + __be32 ack_seq; + #if defined(__LITTLE_ENDIAN_BITFIELD) + __u16 res1:4, + doff:4, + fin:1, + syn:1, + rst:1, + psh:1, + ack:1, + urg:1, + ece:1, + cwr:1; + #elif defined(__BIG_ENDIAN_BITFIELD) + __u16 doff:4, + res1:4, + cwr:1, + ece:1, + urg:1, + ack:1, + psh:1, + rst:1, + syn:1, + fin:1; + #else + #error "Adjust your defines" + #endif + __be16 window; + __sum16 check; + __be16 urg_ptr; + }; + +where: + + * ``source`` is the source port; + * ``dest`` is the destination port; + * ``syn``, ``ack``, ``fin`` are the TCP flags used; for a more detailed view, + see this `diagram + `_. + +The structure of a `UDP header +`_ +(:c:type:`struct udphdr`) has the following fields: + +.. code-block:: c + + struct udphdr { + __be16 source; + __be16 dest; + __be16 len; + __sum16 check; + }; + +where: + + * ``source`` is the source port; + * ``dest`` is the destination port. + +An example of accessing the information present in the headers of a network +packet is as follows: + +.. code-block:: c + + struct sk_buff *skb; + + struct iphdr *iph = ip_hdr(skb); /* IP header */ + /* iph->saddr - source IP address */ + /* iph->daddr - destination IP address */ + if (iph->protocol == IPPROTO_TCP) { /* TCP protocol */ + struct tcphdr *tcph = tcp_hdr(skb); /* TCP header */ + /* tcph->source - source TCP port */ + /* tcph->dest - destination TCP port */ + } else if (iph->protocol == IPPROTO_UDP) { /* UDP protocol */ + struct udphdr *udph = udp_hdr(skb); /* UDP header */ + /* udph->source - source UDP port */ + /* udph->dest - destination UDP port */ + } + +.. _Conversions: + +Conversions +=========== + +In different systems, there are several ways of ordering bytes in a word +(`Endianness `_), including: `Big +Endian `_ (the most +significant byte first) and `Little +Endian `_ (the least +significant byte first). Since a network interconnects systems with different +platforms, the Internet has imposed a standard sequence for the storage of +numerical data, called `network byte-order +`_. In +contrast, the byte sequence for the representation of numerical data on the host +computer is called host byte-order. Data received/sent from/to the network is in +the network byte-order format and should be converted between this format and +the host byte-order. + +For converting we use the following macros: + + * ``u16 htons(u16 x)`` converts a 16 bit integer from host byte-order to + network byte-order (host to network short); + * ``u32 htonl(u32 x)`` converts a 32 bit integer from host byte-order to + network byte-order (host to network long); + * ``u16 ntohs(u16 x)`` converts a 16 bit integer from network byte-order to + host byte-order (network to host short); + * ``u32 ntohl(u32 x)`` converts a 32 bit integer from network byte-order to + host byte-order (network to host long). + +.. _netfilter: + +netfilter +========= + +Netfilter is the name of the kernel interface for capturing network packets for +modifying/analyzing them (for filtering, NAT, etc.). `The netfilter +`_ interface is used in user space by `iptables +`_. + +In the Linux kernel, packet capture using netfilter is done by attaching hooks. +Hooks can be specified in different locations in the path followed by a kernel +network packet, as needed. An organization chart with the route followed by a +package and the possible areas for a hook can be found `here +`_. + +The header included when using netfilter is :file:`linux/netfilter.h`. + +A hook is defined through the :c:type:`struct nf_hook_ops` structure: + +.. code-block:: c + + struct nf_hook_ops { + /* User fills in from here down. */ + nf_hookfn *hook; + struct net_device *dev; + void *priv; + u_int8_t pf; + unsigned int hooknum; + /* Hooks are ordered in ascending priority. */ + int priority; + }; + +where: + + * ``pf`` is the package type (``PF_INET``, etc.); + * ``priority`` is the priority; priorities are defined in + :file:`uapi/linux/netfilter_ipv4.h` as follows: + +.. code-block:: c + + enum nf_ip_hook_priorities { + NF_IP_PRI_FIRST = INT_MIN, + NF_IP_PRI_CONNTRACK_DEFRAG = -400, + NF_IP_PRI_RAW = -300, + NF_IP_PRI_SELINUX_FIRST = -225, + NF_IP_PRI_CONNTRACK = -200, + NF_IP_PRI_MANGLE = -150, + NF_IP_PRI_NAT_DST = -100, + NF_IP_PRI_FILTER = 0, + NF_IP_PRI_SECURITY = 50, + NF_IP_PRI_NAT_SRC = 100, + NF_IP_PRI_SELINUX_LAST = 225, + NF_IP_PRI_CONNTRACK_HELPER = 300, + NF_IP_PRI_CONNTRACK_CONFIRM = INT_MAX, + NF_IP_PRI_LAST = INT_MAX, + }; + +\ + + + * ``dev`` is the device (network interface) on which the capture is + intended; + + + * ``hooknum`` is the type of hook used. When a packet is captured, the + processing mode is defined by the ``hooknum`` and ``hook`` fields. For IP, + hook types are defined in :file:`linux/netfilter.h`: + +.. code-block:: c + + enum nf_inet_hooks { + NF_INET_PRE_ROUTING, + NF_INET_LOCAL_IN, + NF_INET_FORWARD, + NF_INET_LOCAL_OUT, + NF_INET_POST_ROUTING, + NF_INET_NUMHOOKS + }; + +\ + + * ``hook`` is the handler called when capturing a network packet (packet sent + as a :c:type:`struct sk_buff` structure). The ``private`` field is private information + handed to the handler. The capture handler prototype is defined by the + :c:type:`nf_hookfn` type: + +.. code-block:: c + + struct nf_hook_state { + unsigned int hook; + u_int8_t pf; + struct net_device *in; + struct net_device *out; + struct sock *sk; + struct net *net; + int (*okfn)(struct net *, struct sock *, struct sk_buff *); + }; + + typedef unsigned int nf_hookfn(void *priv, + struct sk_buff *skb, + const struct nf_hook_state *state); + +For the :c:func:`nf_hookfn` capture function, the ``priv`` parameter is the +private information with which the :c:type:`struct nf_hook_ops` was +initialized. ``skb`` is the pointer to the captured network packet. Based on +``skb`` information, packet filtering decisions are made. The function's +``state`` parameter is the status information related to the packet capture, +including the input interface, the output interface, the priority, the hook +number. Priority and hook number are useful for allowing the same function to +be called by several hooks. + +A capture handler can return one of the constants ``NF_*``: + +.. code-block:: c + + /* Responses from hook functions. */ + #define NF_DROP 0 + #define NF_ACCEPT 1 + #define NF_STOLEN 2 + #define NF_QUEUE 3 + #define NF_REPEAT 4 + #define NF_STOP 5 + #define NF_MAX_VERDICT NF_STOP + +``NF_DROP`` is used to filter (ignore) a packet, and ``NF_ACCEPT`` is used to +accept a packet and forward it. + +Registering/unregistering a hook is done using the functions defined in +:file:`linux/netfilter.h`: + +.. code-block:: c + + /* Function to register/unregister hook points. */ + int nf_register_net_hook(struct net *net, const struct nf_hook_ops *ops); + void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *ops); + int nf_register_net_hooks(struct net *net, const struct nf_hook_ops *reg, + unsigned int n); + void nf_unregister_net_hooks(struct net *net, const struct nf_hook_ops *reg, + unsigned int n); + + +.. attention:: + + Prior to version 3.11-rc2 of the Linux kernel, + there are some restrictions related to the use of header extraction functions + from a :c:type:`struct sk_buff` structure set as a parameter in a netfilter + hook. While the IP header can be obtained each time using :c:func:`ip_hdr`, + the TCP and UDP headers can be obtained with :c:func:`tcp_hdr` and + :c:func:`udp_hdr` only for packages that come from inside the system rather + than the ones that are received from outside the system. In the latter case, + you must manually calculate the header offset in the package: + + .. code-block:: c + + // For TCP packets (iph->protocol == IPPROTO_TCP) + tcph = (struct tcphdr*)((__u32*)iph + iph->ihl); + // For UDP packets (iph->protocol == IPPROTO_UDP) + udph = (struct udphdr*)((__u32*)iph + iph->ihl); + + This code works in all filtering situations, so it's recommended to use it + instead of header access functions. + +A usage example for a netfilter hook is shown below: + +.. code-block:: c + + #include + #include + #include + #include + #include + #include + #include + + static unsigned int my_nf_hookfn(void *priv, + struct sk_buff *skb, + const struct nf_hook_state *state) + { + /* process packet */ + //... + + return NF_ACCEPT; + } + + static struct nf_hook_ops my_nfho = { + .hook = my_nf_hookfn, + .hooknum = NF_INET_LOCAL_OUT, + .pf = PF_INET, + .priority = NF_IP_PRI_FIRST + }; + + int __init my_hook_init(void) + { + return nf_register_net_hook(&init_net, &my_nfho); + } + + void __exit my_hook_exit(void) + { + nf_unregister_net_hook(&init_net, &my_nfho); + } + + module_init(my_hook_init); + module_exit(my_hook_exit); + +netcat +====== + +When developing applications that include networking code, one of the most +used tools is netcat. Also nicknamed "Swiss-army knife for TCP / IP". It allows: + + * Initiating TCP connections; + * Waiting for a TCP connection; + * Sending and receiving UDP packets; + * Displaying traffic in hexdump format; + * Run a program after establishing a connection (eg, a shell); + * Set special options in sent packages. + +Initiating TCP connections: + +.. code-block:: console + + nc hostname port + +Listening to a TCP port: + +.. code-block:: console + + nc -l -p port + +Sending and receiving UDP packets is done adding the ``-u`` command line option. + +.. note:: + + The command is :command:`nc`; often :command:`netcat` is an alias for this + command. There are other implementations of the netcat command, some of which + have slightly different parameters than the classic implementation. Run + :command:`man nc` or :command:`nc -h` to check how to use it. + +For more information on netcat, check the following `tutorial +`_. + +Further reading +=============== + +#. Understanding Linux Network Internals +#. `Linux IP networking`_ +#. `The TUX Web Server`_ +#. `Beej's Guide to Network Programming Using Internet Sockets`_ +#. `Kernel Korner - Network Programming in the Kernel`_ +#. `Hacking the Linux Kernel Network Stack`_ +#. `The netfilter.org project`_ +#. `A Deep Dive Into Iptables and Netfilter Architecture`_ +#. `Linux Foundation Networking Page`_ + +.. _Linux IP networking: http://www.cs.unh.edu/cnrg/gherrin/ +.. _The TUX Web Server: http://www.stllinux.org/meeting_notes/2001/0719/myTUX/ +.. _Beej's Guide to Network Programming Using Internet Sockets: https://www.beej.us/guide/bgnet/ +.. _Kernel Korner - Network Programming in the Kernel: http://www.linuxjournal.com/article/7660 +.. _Hacking the Linux Kernel Network Stack: http://phrack.org/issues/61/13.html +.. _The netfilter.org project: http://www.netfilter.org/ +.. _A Deep Dive Into Iptables and Netfilter Architecture: https://www.digitalocean.com/community/tutorials/a-deep-dive-into-iptables-and-netfilter-architecture +.. _Linux Foundation Networking Page: http://www.linuxfoundation.org/en/Net:Main_Page + +Exercises +========= + +.. include:: ../labs/exercises-summary.hrst +.. |LAB_NAME| replace:: networking + +.. important:: + + You need to make sure that the ``netfilter`` support is active in kernel. It + is enabled via ``CONFIG_NETFILTER``. To activate it, run :command:`make menuconfig` in + the :file:`linux` directory and check the ``Network packet filtering framework + (Netfilter)`` option in ``Networking support -> Networking options``. If it + was not enabled, enable it (as builtin, not external module - it must be + marked with ``*``). + + +1. Displaying packets in kernel space +------------------------------------- + +Write a kernel module that displays the source address and port for TCP packets +that initiate an outbound connection. Start from the code in +:file:`1-2-netfilter` and fill in the areas marked with ``TODO 1``, taking into +account the comments below. + +You will need to register a netfilter hook of type ``NF_INET_LOCAL_OUT`` as explained +in the `netfilter`_ section. + +`The struct sk_buff structure`_ lets you access the packet headers using +specific functions. The :c:func:`ip_hdr` function returns the IP header as a +pointer to a :c:type:`struct iphdr` structure. The :c:func:`tcp_hdr` function +returns the TCP header as a pointer to a :c:type:`struct tcphdr` structure. + +The `diagram`_ explains how to make a TCP connection. The connection initiation +packet has the ``SYN`` flag set in the TCP header and the ``ACK`` flag cleared. + +.. note:: + + To display the source IP address, use the ``%pI4`` format of the printk + function. Details can be found in the `kernel documentation + `_ (``IPv4 + addresses`` section). The following is an example code snippet that uses + ``%pI4``: + + .. code-block:: c + + printk("IP address is %pI4\n", &iph->saddr); + + When using the ``%pI4`` format, the argument to printk is a pointer. Hence the + construction ``&iph->saddr`` (with operator & - ampersand) instead of + ``iph->saddr``. + +The source TCP port is, in the TCP header, in the `network byte-order`_ format. +Read through the :ref:`Conversions` section. Use :c:func:`ntohs` to convert. + +For testing, use the :file:`1-2-netfilter/user/test-1.sh` file. The test creates +a connection to the localhost, a connection that will be intercepted and +displayed by the kernel module. The script is copied on the virtual machine by +the :command:`make copy` command only if it is marked as executable. The script +uses the statically compiled :command:`netcat` tool stored in +:file:`skels/networking/netcat`; this program must have execution +permissions. + +After running the checker the output should be similar to the one bellow: + +.. code-block:: c + + # ./test-1.sh + [ 229.783512] TCP connection initiated from 127.0.0.1:44716 + Should show up in filter. + Check dmesg output. + +2. Filtering by destination address +----------------------------------- + +Extend the module from exercise 1 so that you can specify a destination address +by means of a ``MY_IOCTL_FILTER_ADDRESS`` ioctl call. You'll only show packages +containing the specified destination address. To solve this task, fill in the +areas marked with ``TODO 2`` and follow the specifications below. + +To implement the ioctl routine, you must fill out the ``my_ioctl`` function. +Review the section in :ref:`ioctl`. The address sent from user space is in +`network byte-order`_, so there will be **NO need** for conversion. + +.. note:: + + The IP address sent via ``ioctl`` is sent by address, not by value. The + address must be stored in the ``ioctl_set_addr`` variable. For copying use + :c:func:`copy_from_user`. + +To compare the addresses, fill out the ``test_daddr`` function. Addresses in +network byte-order will be used without having to convert addresses (if they +are equal from left to right they will be equal if reversed too). + +The ``test_daddr`` function must be called from the netfilter hook to display +the connection initialization packets for which the destination address is the +one sent through the ioctl routine. The connection initiation packet has the +``SYN`` flag set in the TCP header and the ``ACK`` flag cleared. You have to +check two things: + + * the TCP flags; + * the destination address of the packet (using ``test_addr``). + +For testing, use the :file:`1-2-netfilter/user/test-2.sh` script. This script +needs to compile the :file:`1-2-netfilter/user/test.c` file in the test +executable. Compilation is done automatically on the physical system when +running the :command:`make build` command. The test script is copied to the +virtual machine only if it is marked as executable. The script uses the +statically compiled :command:`netcat` tool in :file:`skels/networking/netcat`; +this executable must have execution permissions. + +After running the checker the output should be similar to the one bellow: + +.. code-block:: console + + # ./test-2.sh + [ 797.673535] TCP connection initiated from 127.0.0.1:44721 + Should show up in filter. + Should NOT show up in filter. + Check dmesg output. + +The test ask for packet filtering first for the ``127.0.0.1`` IP address and +then for the ``127.0.0.2`` IP address. The first connection initiation packet +(to ``127.0.0.1``) is intercepted and displayed by the filter, while the second +(to ``127.0.0.2``) is not intercepted. + +3. Listening on a TCP socket +---------------------------- + +Write a kernel module that creates a TCP socket that listens to connections on +port ``60000`` on the loopback interface (in ``init_module``). Start from the +code in :file:`3-4-tcp-sock` fill in the areas marked with ``TODO 1`` taking +into account the observations below. + +Read the `Operations on the socket structure`_ and `The struct proto_ops +structure`_ sections. + +The ``sock`` socket is a ``server socket`` and must be put in the listening +state. That is, the ``bind`` and ``listen`` operations must be applied to the +socket. For the ``bind`` and ``listen`` equivalent, in kernel space you will +need to call ``sock->ops->...;`` examples of such functions you can call are +``sock->ops->bind``, ``sock->ops->listen`` etc. + +.. note:: + + For example, call ``sock->ops->bind``, or ``sock->ops->listen`` functions, see + how they are called in the :c:func:`sys_bind` and :c:func:`sys_listen` system + call handlers. + + Look for the system call handlers in the ``net/socket.c`` file in the Linux + kernel source code tree. + +.. note:: + + For the second argument of the ``listen`` (backlog) call, use the + ``LISTEN_BACKLOG``. + +Remember to release the socket in the module's exit function and in the area +marked with error labels; use :c:func:`sock_release`. + +For testing, run the :command:`3-4-tcp_sock/test-3.sh` script. The script is +copied on the virtual machine by :command:`make copy` only if it is marked as +executable. + +After running the test, a TCP socket will be displayed by listening to +connections on port ``60000``. + +4. Accepting connections in kernel space +---------------------------------------- + +Expand the module from the previous exercise to allow an external connection (no +need to send any message, only accept new connections). Fill in the areas marked +with ``TODO 2``. + +Read the `Operations on the socket structure`_ and `The struct proto_ops +structure`_ sections. + +For the kernel space ``accept`` equivalent, see the system call handler for +:c:func:`sys_accept4`. Follow the `lnet_sock_accept +`_ +implementation, and how the ``sock->ops->accept`` call is used. Use ``0`` as +the value for the second to last argument (``flags``), and ``true`` for the +last argument (``kern``). + +.. note:: + + Look for the system call handlers in the ``net/socket.c`` file in the Linux + kernel source code tree. + +.. note:: + + The new socket (``new_sock``) must be created with the + :c:func:`sock_create_lite` function and then its operations must be configured + using + + .. code-block:: console + + newsock->ops = sock->ops; + +Print the address and port of the destination socket. To find the peer name of a +socket (its address), refer to the :c:func:`sys_getpeername` system call handler. + +.. note:: + + The first argument for the ``sock->ops->getname`` function will be the + connection socket, ie ``new_sock``, the one initialized with by the ``accept`` + call. + + The last argument of the ``sock->ops->getname`` function will be ``1``, + meaning that we want to know about the endpoint or the peer (*remote end* or + *peer*). + + Display the peer address (indicated by the ``raddr`` variable) using the + ``print_sock_address`` macro defined in the file. + +Release the newly created socket (after accepting the connection) in the module +exit function and after the error label. After adding the ``accept`` code to the +module initialization function, the :command:`insmod` operation will lock until +a connection is established. You can unlock using :command:`netcat` on that +port. Consequently, the test script from the previous exercise will not work. + +For testing, run the :file:`3-4-tcp_sock/test-4.sh` script. The script is copied on +the virtual machine by :command:`make copy` only if it is marked as executable. + +Nothing special will be displayed (in the kernel buffer). The success of the +test will be defined by the connection establishment. Then use ``Ctrl+c`` to +stop the test script, and then you can remove the kernel module. + +5. UDP socket sender +-------------------- + +Write a kernel module that creates a UDP socket and sends the message from the +``MY_TEST_MESSAGE`` macro on the socket to the loopback address on port +``60001``. + +Start from the code in :file:`5-udp-sock`. + +Read the `Operations on the socket structure`_ and `The struct proto_ops +structure`_ sections. + +To see how to send messages in the kernel space, see the :c:func:`sys_send` +system call handler or `Sending/receiving messages`_. + +.. hint:: + + The ``msg_name`` field of the :c:type:`struct msghdr` structure must be + initialized to the destination address (pointer to :c:type:`struct sockaddr`) + and the ``msg_namelen`` field to the address size. + + Initialize the ``msg_flags`` field of the :c:type:`struct msghdr` structure + to ``0``. + + Initialize the ``msg_control`` and ``msg_controllen`` fields of the + :c:type:`struct msghdr` structure to ``NULL`` and ``0`` respectively. + +For sending the message use :c:func:`kernel_sendmsg`. + +The message transmission parameters are retrieved from the kernel space. Cast +the :c:type:`struct iovec` structure pointer to a :c:type:`struct kvec` pointer +in the :c:func:`kernel_sendmsg` call. + +.. hint:: + + The last two parameters of :c:func:`kernel_sendmsg` are ``1`` (number of I/O + vectors) and ``len`` (message size). + +For testing, use the :file:`test-5.sh` file. The script is copied on the virtual +machine by the :command:`make copy` command only if it is marked as executable. +The script uses the statically compiled ``netcat`` tool stored in +:file:`skels/networking/netcat`; this executable must have execution +permissions. + +For a correct implementation, running the :file:`test-5.sh` script will cause +the ``kernelsocket`` message to be displayed like in the output below: + +.. code-block:: console + + /root # ./test-5.sh + + pid=1059 + + sleep 1 + + nc -l -u -p 60001 + + insmod udp_sock.ko + kernelsocket + + rmmod udp_sock + + kill 1059 diff --git a/Documentation/teaching/lectures/address-space.rst b/Documentation/teaching/lectures/address-space.rst new file mode 100644 index 00000000000000..2150cbe91dc2ec --- /dev/null +++ b/Documentation/teaching/lectures/address-space.rst @@ -0,0 +1,1080 @@ +============= +Address Space +============= + +`View slides `_ + +.. slideconf:: + :autoslides: False + :theme: single-level + +Lecture objectives: +=================== + +.. slide:: Address Space + :inline-contents: True + :level: 2 + + * x86 MMU + + * Segmentation + + * Paging + + * TLB + + * Linux Address Space + + * User + + * Kernel + + * High memory + + +x86 MMU +======= + +The x86 MMU has a segmentation and a pagination unit. The segmentation +unit can be used to define logical memory segments defined by a +logical (virtual) start address, a base linear (mapped) address and a +size. A segment can also restrict access based on the access type +(read, execute, write) or the privilege level (we can define some +segments to be accessible only by kernel for example). + +When the CPU makes a memory access, it will use the segmentation unit +to translate the logical address to a linear address, based on the +information in the segment descriptor. + +If pagination is enabled the linear address will be further +transformed into a physical address, using the information from the +page tables. + +Note that the segmentation unit can not be disabled, so if the MMU has +been enabled, segmentation will always be used. + +.. slide:: x86 MMU + :inline-contents: True + :level: 2 + + |_| + + .. ditaa:: + + +--------------+ +------------+ + logical | | linear | | physical + ---------> | Segmentation | --------> | Paging | ----------> + address | Unit | address | Unit | address + | | | | + +--------------+ +------------+ + +Selectors +--------- + +A program can use multiple segments and in order to determine which +segment to use, special registers (named selectors) are used. The +basic selectors that are typically used are CS - "Code Selector", DS - +"Data Selector" and SS - "Stack Selector". + +Instruction fetches will by default use CS, while data access will by +default use DS unless the stack is used (e.g. data access through the +pop and push instructions) in which case SS will be used by default. + +Selectors have three main fields: the index, the table index and the +running privilege level: + + +.. slide:: Selectors + :inline-contents: True + :level: 2 + + |_| + + .. ditaa:: + 15 3 2 1 0 + +------------+----+-----+ + | | | | + Segment selectors | index | TI | RPL | + (CS, DS, SS, ES, FS, GS) | | | | + +------------+----+-----+ + + .. ifslides:: + + * Selectors: CS, DS, SS, ES, FS, GS + + * Index: indexes the segment descriptor table + + * TI: selects either the GDT or LDT + + * RPL: for CS only indicates the running (current) priviledge level + + * GDTR and LDTR registers points to the base of GDP and LDT + + +The index will be used to determine which entry of the descriptor +table should be used. `TI` is used to select either the Global +Descriptor Table (GDT) or the Local Descriptor Table (LDT). The tables +are effectively arrays that start at the location specified in the +special registers `GDTR` (for GDT) and `LDTR` (for LDT). + +.. note:: LDT was designed so that applications can define their own + particular segments. Although not many applications use this + feature, Linux (and Windows) provide system calls that + allows an application to create their own segments. + +`RPL` is only used for CS and it represents the current privilege +level. There are 4 privilege levels, the highest level being 0 (and +typically used by the kernel) and the lowest is 3 (and typically used +by user applications). + + +Segment descriptor +------------------ + +The CPU will use the `index` field of the selector to access an 8 byte +descriptor: + +.. slide:: Segment descriptor + :inline-contents: True + :level: 2 + + |_| + + .. ditaa:: + + 63 56 44 40 32 + +-------------------------------+---+---+---+---+---------------+---+---+---+---+---------------+-------------------------------+ + | | | D | | A | Segment | | D | | | | + | Base Address 31:24 | G | / | L | V | Limit | P | P | S | Type | Base Address 23:16 | + | | | B | | L | 19:16 | | L | | | | + +-------------------------------+---+---+---+---+---------------+---+---+---+---+---------------+-------------------------------+ + | | | + | Base address 15:0 | Segment Limit 15:0 | + | | | + +---------------------------------------------------------------+---------------------------------------------------------------+ + 31 15 0 + + + * Base: linear address for the start of the segment + + * Limit: size of the segment + + * G: granularity bit: if set the size is in bytes otherwise in 4K pages + + * B/D: data/code + + * Type: code segment, data/stack, TSS, LDT, GDT + + * Protection: the minimum priviledge level required to access the + segment (RPL is checked against DPL) + + +Some of the descriptor fields should be familiar. And that is because +there is some resemblance with Interrupt Descriptors we looked at +previously. + + +Segmentation in Linux +--------------------- + +In Linux, segments are not used to define the stack, code or +data. These will be setup using the paging unit as it allows better +granularity and more importantly it allows Linux to use a generic +approach that works on other architectures (that don't have +segmentation support). + +However, because the segmentation unit can not be disabled Linux must +create 4 generic 0 - 4GB segments for: kernel code, kernel data, user +code and user data. + +Besides these, Linux uses segments for implementing Thread Local +Storage (TLS) together with the `set_thread_area` system call. + +It also uses the TSS segment in order to define the kernel stack to +use when a change of privilege (e.g. system call, interrupt while +running in user-space) occurs. + +.. slide:: Segmentation in Linux + :inline-contents: True + :level: 2 + + .. code-block:: c + + /* + * The layout of the per-CPU GDT under Linux: + * + * 0 - null <=== cacheline #1 + * 1 - reserved + * 2 - reserved + * 3 - reserved + * + * 4 - unused <=== cacheline #2 + * 5 - unused + * + * ------- start of TLS (Thread-Local Storage) segments: + * + * 6 - TLS segment #1 [ glibc's TLS segment ] + * 7 - TLS segment #2 [ Wine's %fs Win32 segment ] + * 8 - TLS segment #3 <=== cacheline #3 + * 9 - reserved + * 10 - reserved + * 11 - reserved + * + * ------- start of kernel segments: + * + * 12 - kernel code segment <=== cacheline #4 + * 13 - kernel data segment + * 14 - default user CS + * 15 - default user DS + * 16 - TSS <=== cacheline #5 + * 17 - LDT + * 18 - PNPBIOS support (16->32 gate) + * 19 - PNPBIOS support + * 20 - PNPBIOS support <=== cacheline #6 + * 21 - PNPBIOS support + * 22 - PNPBIOS support + * 23 - APM BIOS support + * 24 - APM BIOS support <=== cacheline #7 + * 25 - APM BIOS support + * + * 26 - ESPFIX small SS + * 27 - per-cpu [ offset to per-cpu data area ] + * 28 - stack_canary-20 [ for stack protector ] <=== cacheline #8 + * 29 - unused + * 30 - unused + * 31 - TSS for double fault handler + */ + + DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { + #ifdef CONFIG_X86_64 + /* + * We need valid kernel segments for data and code in long mode too + * IRET will check the segment types kkeil 2000/10/28 + * Also sysret mandates a special GDT layout + * + * TLS descriptors are currently at a different place compared to i386. + * Hopefully nobody expects them at a fixed place (Wine?) + */ + [GDT_ENTRY_KERNEL32_CS] = GDT_ENTRY_INIT(0xc09b, 0, 0xfffff), + [GDT_ENTRY_KERNEL_CS] = GDT_ENTRY_INIT(0xa09b, 0, 0xfffff), + [GDT_ENTRY_KERNEL_DS] = GDT_ENTRY_INIT(0xc093, 0, 0xfffff), + [GDT_ENTRY_DEFAULT_USER32_CS] = GDT_ENTRY_INIT(0xc0fb, 0, 0xfffff), + [GDT_ENTRY_DEFAULT_USER_DS] = GDT_ENTRY_INIT(0xc0f3, 0, 0xfffff), + [GDT_ENTRY_DEFAULT_USER_CS] = GDT_ENTRY_INIT(0xa0fb, 0, 0xfffff), + #else + [GDT_ENTRY_KERNEL_CS] = GDT_ENTRY_INIT(0xc09a, 0, 0xfffff), + [GDT_ENTRY_KERNEL_DS] = GDT_ENTRY_INIT(0xc092, 0, 0xfffff), + [GDT_ENTRY_DEFAULT_USER_CS] = GDT_ENTRY_INIT(0xc0fa, 0, 0xfffff), + [GDT_ENTRY_DEFAULT_USER_DS] = GDT_ENTRY_INIT(0xc0f2, 0, 0xfffff), + /* + * Segments used for calling PnP BIOS have byte granularity. + * They code segments and data segments have fixed 64k limits, + * the transfer segment sizes are set at run time. + */ + /* 32-bit code */ + [GDT_ENTRY_PNPBIOS_CS32] = GDT_ENTRY_INIT(0x409a, 0, 0xffff), + /* 16-bit code */ + [GDT_ENTRY_PNPBIOS_CS16] = GDT_ENTRY_INIT(0x009a, 0, 0xffff), + /* 16-bit data */ + [GDT_ENTRY_PNPBIOS_DS] = GDT_ENTRY_INIT(0x0092, 0, 0xffff), + /* 16-bit data */ + [GDT_ENTRY_PNPBIOS_TS1] = GDT_ENTRY_INIT(0x0092, 0, 0), + /* 16-bit data */ + [GDT_ENTRY_PNPBIOS_TS2] = GDT_ENTRY_INIT(0x0092, 0, 0), + /* + * The APM segments have byte granularity and their bases + * are set at run time. All have 64k limits. + */ + /* 32-bit code */ + [GDT_ENTRY_APMBIOS_BASE] = GDT_ENTRY_INIT(0x409a, 0, 0xffff), + /* 16-bit code */ + [GDT_ENTRY_APMBIOS_BASE+1] = GDT_ENTRY_INIT(0x009a, 0, 0xffff), + /* data */ + [GDT_ENTRY_APMBIOS_BASE+2] = GDT_ENTRY_INIT(0x4092, 0, 0xffff), + + [GDT_ENTRY_ESPFIX_SS] = GDT_ENTRY_INIT(0xc092, 0, 0xfffff), + [GDT_ENTRY_PERCPU] = GDT_ENTRY_INIT(0xc092, 0, 0xfffff), + GDT_STACK_CANARY_INIT + #endif + } }; + EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); + + +Inspecting selectors and segments +--------------------------------- + +.. slide:: Inspecting selectors and segments + :inline-contents: True + :level: 2 + + |_| + + .. asciicast:: ../res/selectors-and-segments.cast + + +x86 Paging +---------- + +The x86 paging unit support two types of paging: regular and extended paging. + +Regular paging has 2 levels and a fixed page size of 4KB. The linear +address is split in three fields: + +* Directory (the 10 most significant bits) + +* Table (the next 10 most bits) + +* Offset (the least significant 12 bits) + + +.. slide:: Regular paging + :inline-contents: True + :level: 2 + + |_| + + .. ditaa:: + :--no-separation: + + Virtual Address + +------------+ +------------------+----------------+---------------+ + | CR3 | | DIRECTORY cEEE| TABLE cDDD | OFFSET cCCC| + +------------+ +------------------+----------------+---------------+ + | | | | + | | | | PAGE + | | | | /-----------------------\ + | | | | | | + | | | | | | + | | +-----------+ | +-----------------------+ + | | | +--->| Physical Address cCCC| + | | | +-----------------------+ + | +-----------------+ | | | + | | | PAGE | | + | | | TABLE | | + | | PAGE | /------------\ | | + | | DIRECTORY | | | | | + | | /------------\ | | | | | + | | | | | +------------+ +----> \-----------------------/ + | | | | +---->| cDDD |---+ + | | | | +------------+ + | | | | | | + | | | | | | + | | +------------+ | | + | +----->|cEEE |---+ | | + | +------------+ | | | + | | | +---->\------------/ + | | | + +--------->\------------/ + + +When extended paging is enabled, a single level is used and pages are +4MB. The linear address is split in two fields: + +* Directory (10 most significant bits) + +* Offset (least significant 22 bits) + +.. slide:: Extended paging + :inline-contents: True + :level: 2 + + .. ditaa:: + :--no-separation: + + Virtual Address + +------------+ +-------------------+-----------------------------+ + | CR3 | | DIRECTORY cEEE | OFFSET cDDD | + +------------+ +-------------------+-----------------------------+ + | | | + | | | PAGE + | | | /----------------------\ + | | | | | + | | | | | + | | | +----------------------+ + | | +--->| Physical Address cDDD| + | | +----------------------+ + | +-----------------+ | | + | | | | + | | | | + | | PAGE | | + | | DIRECTORY | | + | | /------------\ | | + | | | | +------------------>\----------------------/ + | | | | | + | | | | | + | | | | | + | | | | | + | | +------------+ | + | +----->| cEEE |-------------+ + | +------------+ + | | | + | | | + +---------->\------------/ + + +Page tables +------------ + +We can mix regular and extended paging, the directory page has a bit +that specifies if extended or regular paging should be used. The +special CR3 register points to the base of the page directory and page +directory entries point to the base of the page table. + +Both page directory and page table have 1024 entries and each entry +has 4 bytes. + +All tables are stored in memory and the page table addresses are +physical addresses. + + +.. slide:: Page tables + :inline-contents: False + :level: 2 + + * Both page directory and page table have 1024 entries + + * Each entry has 4 bytes + + * The special CR3 register point to the base of the page directory + + * Page directory entries points to the base of the page table + + * All tables are stored in memory + + * All table addresses are physical addresses + + +Page table entry fields: + +.. slide:: Page table entry fields + :inline-contents: True + :level: 2 + + * Present/Absent + + * PFN (Page Frame Number): the most 20 significant bits of the physical address + + * Accessed - not updated by hardware (can be used by OS for housekeeping) + + * Dirty - not updated by hardware (can be used by OS for housekeeping) + + * Access rights: Read/Write + + * Privilege: User/Supervisor + + * Page size - only for page directory; if set extended paging is used + + * PCD (page cache disable), PWT (page write through) + + +Linux paging +------------ + +Linux paging uses 4 levels in order to support 64bit +architectures. The diagram below shows how the various virtual address +chunks are used to index the page tables and compute the physical +address. + + +.. slide:: Linux paging + :inline-contents: True + :level: 2 + + .. ditaa:: + :--no-separation: + + Virtual Address + +------------+ +------------------+-----------------+------------------+-------------------+---------------+ + | CR3 | | GLOBAL DIR cEEE| UPPER DIR cDDD| MIDDLE DIR cCCC| TABLE cBBB| OFFSET cAAA | + +------------+ +------------------+-----------------+------------------+-------------------+---------------+ + | | | | | | + | | | | | | PAGE + | | | | | | /----------------------\ + | | | | | | | | + | | | | | | | | + | | +-----------+ | | PAGE GLOBAL | +----------------------+ + | | | | | DIRECTORY +-------->| Physical Address cAAA| + | | | | PAGE MIDDLE | /------------\ +----------------------+ + | +-----------------+ | | DIRECTORY | | | | | + | | | PAGE UPPER | /------------\ | | | | | + | | | DIRECTORY | | | | | | | | + | | PAGE GLOBAL | /------------\ | | | | | | | | + | | DIRECTORY | | | | +------------+ | | | | | + | | /------------\ | | | +--->| cCCC |---+ | +------------+ | | + | | | | | | | +------------+ | +--->| cBBB |---------->\----------------------/ + | | | | | | | | | | +------------+ + | | | | | +------------+ +----->\------------/ | | | + | | | | +---->| cDDD |---+ | | | + | | | | +------------+ +----->\------------/ + | | +------------+ | | + | +----->| cEEE |--+ | | + | +------------+ | | | + | | | +----->\------------/ + | | | + +--------->\------------/ + + +Linux has a common API for creating and walking page tables. Creating +and modifying address spaces for kernel and processes is done using +the same generic code which relies on macros and functions to +translate these generic operations in code that runs on different +architectures. + +Here is an example of how we can translate a virtual address to a +physical address, using the Linux page table APIs: + +.. slide:: Linux APIs for page table handling + :inline-contents: True + :level: 2 + + .. code-block:: c + + struct * page; + pgd_t pgd; + pmd_t pmd; + pud_t pud; + pte_t pte; + void *laddr, *paddr; + + pgd = pgd_offset(mm, vaddr); + pud = pud_offet(pgd, vaddr); + pmd = pmd_offset(pud, vaddr); + pte = pte_offset(pmd, vaddr); + page = pte_page(pte); + laddr = page_address(page); + paddr = virt_to_phys(laddr); + + +In order to support architectures with less than 4 levels of +pagination (such as for x86 32bits) some macros and / or functions are +0 / empty: + +.. slide:: What about platforms with less then 4 levels of pagination? + :inline-contents: True + :level: 2 + + .. code-block:: c + + static inline pud_t * pud_offset(pgd_t * pgd,unsigned long address) + { +     return (pud_t *)pgd; + } + + static inline pmd_t * pmd_offset(pud_t * pud,unsigned long address) + { +     return (pmd_t *)pud; + } + + +Translation Look-aside Buffer +----------------------------- + +When using virtual memory, due to the table page organization, we may +need an extra 1 (x86 extended paging), 2 (x86 regular paging) or 3 +(x86 64bit) memory access(es). + +A special cache, called Translation Look-aside Buffer (TLB) is used to +speed up translations from virtual address to physical addresses. + +The TLB has the following properties: + +.. slide:: Translation Look-aside Buffer + :inline-contents: True + :level: 2 + + * Caches paging information (PFN, rights, privilege) + + * Content Addressable Memory / Associative Memory + + * Very small (64-128) + + * Very fast (single cycle due to parallel search implementation) + + * CPUs usually have two TLBs: i-TLB (code) and d-TLB (data) + + * TLB miss penalty: up hundreds of cycles + + +As with other caches, we must be careful to not create consistency +issues. + +For example, when changing the mapping of one page to point to a +different physical memory location in the page tables, we must +invalidate the associated TLB entry. Otherwise, the MMU will do the +translation to the old physical address instead of the new physical +address. + +The x86 platform supports TLB invalidation through two types of +operations. + +.. slide:: TLB invalidation + :inline-contents: True + :level: 2 + + Single address invalidation: + + .. code-block:: asm + + mov $addr, %eax + invlpg %(eax) + + Full invalidation: + + .. code-block:: asm + + mov %cr3, %eax + mov %eax, %cr3 + + +Linux address space +=================== + +Address space options for 32bit systems +--------------------------------------- + +There are two main options for implementing kernel and user space: +either dedicated address spaces for each, or split a shared address +space. + +.. slide:: Address space options for 32bit systems + :inline-contents: True + :level: 2 + + |_| + + .. ditaa:: + + +-------------------+ +-------------------+ 0xFFFFFFFF +-------------------+ ^ + | | | | | | | + | | | | | | | Kernel space + | | | | | | | + | User | | Kernel | 0xC0000000 +-------------------+ v + | space | | space | | | ^ + | | | | | | | User space + | | | | | | | + | | | | | | | + | | | | | | | + | | | | | | | + | | | | | | | + | | | | | | | + | | | | | | | + +-------------------+ +-------------------+ 0x00000000 +-------------------+ v + + + (a) 4/4 split (b) 1/3 or2/2 split + + +Each has advantages and disadvantages: + +.. slide:: Advantages and disadvantages + :inline-contents: True + :level: 2 + + * Disadvantages for dedicated kernel space: + + * Fully invalidating the TLB for every system call + + * Disadvantages for shared address space + + * Less address space for both kernel and user processes + + +Linux is using a split address space for 32 bit systems, although in +the past there were options for supporting 4/4s split or dedicated +kernel address space (on those architecture that supports it, +e.g. x86). Linux always uses split address space for 64 bit systems. + +On overview of the Linux address space is presented below: + +.. slide:: Linux address space for 32bit systems + :inline-contents: True + :level: 2 + + |_| + + .. ditaa:: + :--no-separation: + + : : : : + | User space | Lowmem | Highmem | + | arbitrary mapping | linear mapping | arbitrary mapping | + | | | | + +----+----+--------------------+----+------+----+----+---------------+----+----+-----+----+----+ Virtual + |cEEE|cGRE|cEEE |cRED|cEEE |cAAA|cGRE| cAAA |cEEE|cGRE|cEEE |cRED|cEEE| memory + | | | | | | | | | | | | | | + +----+----+--------------------+----+------+----+----+---------------+----+----+-----+----+----+ + | | 3G | 3.896G | | 4G + | +-------+ | | | + | | | | | + |<----------------------------------+------+<-------------------------+ | + | | | + | |<-------------------------------------------+ + | | + v V + +----+----+---------------+--------------+----+------------------------------------------------+ Physical + |cAAA|cGRE| cAAA | cEEE |cRED| cEEE | memory + | | | | | | | + +----+----+---------------+--------------+----+------------------------------------------------+ + 896MB + + +Linear mappings +--------------- + +Linear mappings refer to particular way of mapping virtual pages to +physical pages, where virtual page V, V + 1, ... V + n is mapped to +physical pages P, P + 1, ..., P + n. + +To understand the necessity of linear mappings, we should look at +common kernel operations that involves using both the virtual and +physical address of a page such as an I/O transfer: + +.. slide:: Virtual to physical address translations for I/O transfers + :inline-contents: True + :level: 2 + + * Use the virtual address of a kernel buffer in order to copy to + data from from user space + + * Walk the page tables to transform the kernel buffer virtual + address to a physical address + + * Use the physical address of the kernel buffer to start a DMA + transfer + + +However, if we use linear mappings and the kernel buffers are in the +linear mapping area, then: + +.. slide:: Linear mappings + :inline-contents: True + :level: 2 + + * Virtual to physical address space translation is reduced to one + operation (instead of walking the page tables) + + * Less memory is used to create the page tables + + * Less TLB entries are used for the kernel memory + + +Highmem +------- + +The "highmem" part of the virtual address space is used to create +arbitrary mappings (as opposed to linear mappings in lowmem). On 32bit +systems the highmem area is absolutely required in order to access +physical memory outside of lowmem. However, highmem is also used on +64bit systems but the use-case there is mainly to allow arbitrary +mappings in kernel space. + + +.. slide:: Highmem + :inline-contents: True + :level: 2 + + |_| + + .. ditaa:: + + +--------+ 8MB +-----------+ 4KB +-----------+ +-----------+ 4KB +------------+-----------+------------+ + | | | | | | | | | Persistent | Temporary | Fix-mapped | + | Lowmem | <-----> | VMAP area | <-----> | VMAP area | ... | VMAP area | <-----> | Kernel | Kernel | linear | + | | | | | | | | | Mappings | Mappings | addresses | + +--------+ +-----------+ +-----------+ +-----------+ +------------+-----------+------------+ + : : + | 128MB | + |<------------------------------------------------------------------------------------------------------------->| + | | + | | + VMALLOC_START 4GB + (896MB) + + +There are multiple types of mappings in the highmem area: + +* Multi-page permanent mappings (vmalloc, ioremap) + +* Temporary 1 page mappings (atomic_kmap) + +* Permanent 1 page mappings (kmap, fix-mapped linear addresses) + + +Multiple page mappings allows mapping of ranges of physical memory +into the highmem area. Each such mapping is guarded by a +non-accessible page to catch buffer overflow and underflow errors. + + +The APIs that maps multiple pages into highmem are: + +.. slide:: Multi-page permanent mappings + :inline-contents: True + :level: 2 + + .. code-block:: c + + void* vmalloc(unsigned long size); + void vfree(void * addr); + + void *ioremap(unsigned long offset, unsigned size); + void iounmap(void * addr); + +:c:func:`vmalloc` is used to allocate non-contiguous system memory +pages as a contiguous segment in the kernel virtual address space. It +is usefully when allocating large buffers because due to fragmentation +it is unlikely to find free large chunks of physical contiguous memory. + +:c:func:`ioremap` is used to map device memory or device registers +into the kernel address space. It maps a contiguous physical memory +range into highmem with page caching disabled. + +Fixed-mapped linear addresses +----------------------------- + +Fixed-mapped linear addresses are a special class of singular page +mappings that are used for accessing registers of commonly used +peripherals such as the APIC or IO APIC. + +Typical I/O access for peripherals is to use a base (the kernel +virtual address space where the peripheral registers are mapped) + +offsets for various registers. + +In order to optimize access, the base is reserved at compile time +(e.g. 0xFFFFF000). Since the base is constant, the various register +accesses of the form `base + register offset` will also be constant +and thus the compiler will avoid generating an extra instruction. + +In summary, fixed-mapped linear addresses are: + +.. slide:: Fixed-mapped linear addresses + :inline-contents: True + :level: 2 + + * Reserved virtual addresses (constants) + + * Mapped to physical addresses during boot + + .. code-block:: c + + set_fixmap(idx, phys_addr) + set_fixmap_nocache(idx, phys_addr) + + +These addresses are architecture defined and, as an example, this is +the map for x86: + +.. slide:: Fixed-mapped linear addresses + :inline-contents: True + :level: 2 + + .. code-block:: c + + /* + * Here we define all the compile-time 'special' virtual + * addresses. The point is to have a constant address at + * compile time, but to set the physical address only + * in the boot process. + * for x86_32: We allocate these special addresses + * from the end of virtual memory (0xfffff000) backwards. + * Also this lets us do fail-safe vmalloc(), we + * can guarantee that these special addresses and + * vmalloc()-ed addresses never overlap. + * + * These 'compile-time allocated' memory buffers are + * fixed-size 4k pages (or larger if used with an increment + * higher than 1). Use set_fixmap(idx,phys) to associate + * physical memory with fixmap indices. + * + * TLB entries of such buffers will not be flushed across + * task switches. + */ + + enum fixed_addresses { + #ifdef CONFIG_X86_32 + FIX_HOLE, + #else + #ifdef CONFIG_X86_VSYSCALL_EMULATION + VSYSCALL_PAGE = (FIXADDR_TOP - VSYSCALL_ADDR) >> PAGE_SHIFT, + #endif + #endif + FIX_DBGP_BASE, + FIX_EARLYCON_MEM_BASE, + #ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT + FIX_OHCI1394_BASE, + #endif + #ifdef CONFIG_X86_LOCAL_APIC + FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */ + #endif + #ifdef CONFIG_X86_IO_APIC + FIX_IO_APIC_BASE_0, + FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS - 1, + #endif + #ifdef CONFIG_X86_32 + FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */ + FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1, + #ifdef CONFIG_PCI_MMCONFIG + FIX_PCIE_MCFG, + #endif + + +Notice how easy is to do the conversion between the virtual address +and the fixed address indexes: + +.. slide:: Conversion between virtual address fixed address indexes + :inline-contents: True + :level: 2 + + + .. code-block:: c + + #define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT)) + #define __virt_to_fix(x) ((FIXADDR_TOP - ((x)&PAGE_MASK)) >> PAGE_SHIFT) + + #ifndef __ASSEMBLY__ + /* + * 'index to address' translation. If anyone tries to use the idx + * directly without translation, we catch the bug with a NULL-deference + * kernel oops. Illegal ranges of incoming indices are caught too. + */ + static __always_inline unsigned long fix_to_virt(const unsigned int idx) + { + BUILD_BUG_ON(idx >= __end_of_fixed_addresses); + return __fix_to_virt(idx); + } + + static inline unsigned long virt_to_fix(const unsigned long vaddr) + { + BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START); + return __virt_to_fix(vaddr); + } + + + inline long fix_to_virt(const unsigned int idx) + { + if (idx >= __end_of_fixed_addresses) + __this_fixmap_does_not_exist(); + return (0xffffe000UL - (idx << PAGE_SHIFT)); + } + + +Temporary mappings +------------------ + +Temporary mappings can be used to map a single physical page, very +fast, in kernel space. It can be used in interrupt context but the +atomic kmap section, defined in between the :c:func:`kmap_atomic` and +:c:func:`kunmap_atomic` can not be preempted. That is why these are +called temporary mappings, as they can only be used momentarily. + + +.. slide:: Temporary mappings + :inline-contents: false + :level: 2 + + * :c:func:`kmap_atomic`, :c:func:`kunmap_atomic` + + * No context switch is permitted in atomic kmap section + + * Can be used in interrupt context + + * No locking required + + * Only invalidates on TLB entry + + +Temporary mappings are very fast because there is no locking or +searching required and also there is no full TLB invalidation, just +the particular virtual page will be TLB invalidated. + +Here are some code snippets that show that temporary mappings are +implemented: + +.. slide:: Temporary mappings implementation + :inline-contents: True + :level: 2 + + + .. code-block:: c + + #define kmap_atomic(page) kmap_atomic_prot(page, kmap_prot) + + void *kmap_atomic_high_prot(struct page *page, pgprot_t prot) + { + unsigned long vaddr; + int idx, type; + + type = kmap_atomic_idx_push(); + idx = type + KM_TYPE_NR*smp_processor_id(); + vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); + BUG_ON(!pte_none(*(kmap_pte-idx))); + set_pte(kmap_pte-idx, mk_pte(page, prot)); + arch_flush_lazy_mmu_mode(); + + return (void *)vaddr; + } + EXPORT_SYMBOL(kmap_atomic_high_prot); + + static inline int kmap_atomic_idx_push(void) + { + int idx = __this_cpu_inc_return(__kmap_atomic_idx) - 1; + + #ifdef CONFIG_DEBUG_HIGHMEM + WARN_ON_ONCE(in_irq() && !irqs_disabled()); + BUG_ON(idx >= KM_TYPE_NR); + #endif + return idx; + } + + +Notice that fix-mapped linear addresses and a stack like approach is +used: each CPU has KM_TYPE_NR reserved entries which are used in a +first code first serve option. This allows using multiple temporary +mappings at once, for example one in process context, one in an +interrupt handler, and a few more in tasklets or softirqs. + +.. slide:: Implementation of temporary mappings + :inline-contents: false + :level: 2 + + * Use the fixed-mapped linear addresses + + * Every CPU has KM_TYPE_NR reserved entries to be used for + temporary mappings + + * Stack like selection: every user picks the current entry and + increments the "stack" counter + +Permanent mappings +------------------ + +Permanent mappings allows users to hold on to a mapping for long +(undefined) periods of time which means that context switch are +allowed after a mapping and before releasing it. + +This flexibility comes with a price though. A search operation is +performed to find a free entry and they can not be used in interrupt +context - the operation that tries to find a free virtual address page +may block. There is a limited number of permanent mappings available +(topically one page is reserved for permanent mappings) + +.. slide:: Permanent mappings + :inline-contents: false + :level: 2 + + * :c:func:`kmap`, :c:func:`kunmap` + + * Context switches are allowed + + * Only available in process context + + * One page table is reserved for permanent mappings + + * Page counter + + * 0 - page is not mapped, free and ready to use + + * 1 - page is not mapped, may be present in TLB needs flushing before using + + * N - page is mapped N-1 times + + diff --git a/Documentation/teaching/lectures/arch.rst b/Documentation/teaching/lectures/arch.rst new file mode 100644 index 00000000000000..ef97e871cd8559 --- /dev/null +++ b/Documentation/teaching/lectures/arch.rst @@ -0,0 +1,217 @@ +================== +Architecture Layer +================== + +`View slides `_ + +.. slideconf:: + :autoslides: False + :theme: single-level + +Lecture objectives: +=================== + +.. slide:: Introduction + :inline-contents: True + :level: 2 + + * Overview of the arch layer + + * Overview of the boot process + + +Overview of the arch layer +========================== + +.. slide:: Overview of the arch layer + :level: 2 + :inline-contents: True + + .. ditaa:: + :height: 100% + + +---------------+ +--------------+ +---------------+ + | Application 1 | | Application2 | ... | Application n | + +---------------+ +--------------+ +---------------+ + | | | + v v v + +--------------------------------+------------------------+ + | Kernel core & subsystems | Generic Drivers | + +--------------------------------+------------------------+ + | Generic Architecture Code | + +---------------------------------------------------------+ + | Architecture Specific Code | + | | + | +-----------+ +--------+ +---------+ +--------+ | + | | Bootstrap | | Memory | | Threads | | Timers | | + | +-----------+ +--------+ +---------+ +--------+ | + | +------+ +----------+ +------------------+ | + | | IRQs | | Syscalls | | Platform Drivers | | + | +------+ +----------+ +------------------+ | + | +------------------+ +---------+ +---------+ | + | | Platform Drivers | | machine | ... | machine | | + | +------------------+ +---------+ +---------+ | + +---------------------------------------------------------+ + | | | + v v v + +--------------------------------------------------------+ + | Hardware | + +--------------------------------------------------------+ + + +Boot strap +---------- + +.. slide:: Bootstrap + :level: 2 + :inline-contents: True + + * The first kernel code that runs + + * Typically runs with the MMU disabled + + * Move / Relocate kernel code + + +Boot strap +---------- + +.. slide:: Bootstrap + :level: 2 + :inline-contents: True + + * The first kernel code that runs + + * Typically runs with the MMU disabled + + * Copy bootloader arguments and determine kernel run location + + * Move / relocate kernel code to final location + + * Initial MMU setup - map the kernel + + + +Memory setup +------------ + +.. slide:: Memory Setup + :level: 2 + :inline-contents: True + + * Determine available memory and setup the boot memory allocator + + * Manages memory regions before the page allocator is setup + + * Bootmem - used a bitmap to track free blocks + + * Memblock - deprecates bootmem and adds support for memory ranges + + * Supports both physical and virtual addresses + + * support NUMA architectures + + +MMU management +-------------- + +.. slide:: MMU management + :level: 2 + :inline-contents: True + + * Implements the generic page table manipulation APIs: types, + accessors, flags + + * Implement TLB management APIs: flush, invalidate + + +Thread Management +----------------- + +.. slide:: Thread Management + :level: 2 + :inline-contents: True + + * Defines the thread type (struct thread_info) and implements + functions for allocating threads (if needed) + + * Implement :c:func:`copy_thread` and :c:func:`switch_context` + + +Time Management +---------------- + +.. slide:: Timer Management + :level: 2 + :inline-contents: True + + * Setup the timer tick and provide a time source + + * Mostly transitioned to platform drivers + + * clock_event_device - for scheduling timers + + * clocksource - for reading the time + + +IRQs and exception management +----------------------------- + +.. slide:: IRQs and exception management + :level: 2 + :inline-contents: True + + * Define interrupt and exception handlers / entry points + + * Setup priorities + + * Platform drivers for interrupt controllers + + +System calls +------------ + +.. slide:: System calls + :level: 2 + :inline-contents: True + + * Define system call entry point(s) + + * Implement user-space access primitives (e.g. copy_to_user) + + +Platform Drivers +---------------- + +.. slide:: Platform Drivers + :level: 2 + :inline-contents: True + + * Platform and architecture specific drivers + + * Bindings to platform device enumeration methods (e.g. device tree + or ACPI) + +Machine specific code +--------------------- + +.. slide:: Machine specific code + :level: 2 + :inline-contents: True + + * Some architectures use a "machine" / "platform" abstraction + + * Typical for architecture used in embedded systems with a lot of + variety (e.g. ARM, powerPC) + + +Overview of the boot process +============================ + + +.. slide:: Boot flow inspection + :level: 2 + :inline-contents: True + + + .. asciicast:: ../res/boot.cast diff --git a/Documentation/teaching/lectures/debugging.rst b/Documentation/teaching/lectures/debugging.rst new file mode 100644 index 00000000000000..dc384855ca36ef --- /dev/null +++ b/Documentation/teaching/lectures/debugging.rst @@ -0,0 +1,942 @@ +========= +Debugging +========= + +`View slides `_ + +.. slideconf:: + :autoslides: False + :theme: single-level + +Lecture objectives: +=================== + +One essential part of Linux kernel development is debugging. In user space we had +the support of the kernel so we could easily stop processes and use gdb to inspect +their behavior. In the kernel, in order to use gdb we need to use hypervisor like +QEMU or JTAG based hardware interfaces which are not always available. The Linux +kernel provides a set of tools and debug options useful for investigating abnormal +behavior. + +In this lecture we will learn about: + +.. slide:: Debugging + :inline-contents: True + :level: 2 + + * decoding an oops/panic + * list debugging + * memory debugging + * locking debugging + * profiling + +Decoding an oops/panic +====================== + +An oops is an inconsistent state that the kernel detects inside itself. +Upon detecting an oops the Linux kernel kills the offending process, +prints information that can help debug the problem and continues execution +but with limited reliability. + +Lets consider the following Linux kernel module: + +.. slide:: Oops module + :inline-contents: True + :level: 2 + + .. code-block:: c + + static noinline void do_oops(void) + { + *(int*)0x42 = 'a'; + } + + static int so2_oops_init(void) + { + pr_info("oops_init\n"); + do_oops(); + + return 0; + } + + static void so2_oops_exit(void) + { + pr_info("oops exit\n"); + } + + module_init(so2_oops_init); + module_exit(so2_oops_exit); + +Notice that ''do_oops'' function tries to write at an invalid memory address. Because the kernel +cannot find a suitable physical page were to write, it kills the insmod task in the context of +which ''do_oops'' runs. Then it prints the following oops message: + + .. code-block:: bash + + root@qemux86:~/skels/debugging/oops# insmod oops.ko + BUG: unable to handle kernel NULL pointer dereference at 00000042 + IP: do_oops+0x8/0x10 [oops] + *pde = 00000000 + Oops: 0002 [#1] SMP + Modules linked in: oops(O+) + CPU: 0 PID: 234 Comm: insmod Tainted: G O 4.15.0+ #3 + Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 + EIP: do_oops+0x8/0x10 [oops] + EFLAGS: 00000292 CPU: 0 + EAX: 00000061 EBX: 00000000 ECX: c7ed3584 EDX: c7ece8dc + ESI: c716c908 EDI: c8816010 EBP: c7257df0 ESP: c7257df0 + DS: 007b ES: 007b FS: 00d8 GS: 0033 SS: 0068 + CR0: 80050033 CR2: 00000042 CR3: 0785f000 CR4: 00000690 + Call Trace: + so2_oops_init+0x17/0x20 [oops] + do_one_initcall+0x37/0x170 + ? cache_alloc_debugcheck_after.isra.19+0x15f/0x2f0 + ? __might_sleep+0x32/0x90 + ? trace_hardirqs_on_caller+0x11c/0x1a0 + ? do_init_module+0x17/0x1c2 + ? kmem_cache_alloc+0xa4/0x1e0 + ? do_init_module+0x17/0x1c2 + do_init_module+0x46/0x1c2 + load_module+0x1f45/0x2380 + SyS_init_module+0xe5/0x100 + do_int80_syscall_32+0x61/0x190 + entry_INT80_32+0x2f/0x2f + EIP: 0x44902cc2 + EFLAGS: 00000206 CPU: 0 + EAX: ffffffda EBX: 08afb050 ECX: 0000eef4 EDX: 08afb008 + ESI: 00000000 EDI: bf914dbc EBP: 00000000 ESP: bf914c1c + DS: 007b ES: 007b FS: 0000 GS: 0033 SS: 007b + Code: 42 00 00 00 5d c3 90 55 89 e5 83 ec 04 c7 04 24 24 70 81 c8 e8 + EIP: do_oops+0x8/0x10 [oops] SS:ESP: 0068:c7257df0 + CR2: 0000000000000042 + ---[ end trace 011848be72f8bb42 ]--- + Killed + +An oops contains information about the IP which caused the fault, register status, process, +CPU on which the fault happend like below: + +.. slide:: Oops information + :inline-contents: True + :level: 2 + + .. code-block:: bash + + root@qemux86:~/skels/debugging/oops# insmod oops.ko + BUG: unable to handle kernel NULL pointer dereference at 00000042 + IP: do_oops+0x8/0x10 [oops] + *pde = 00000000 + Oops: 0002 [#1] SMP + Modules linked in: oops(O+) + CPU: 0 PID: 234 Comm: insmod Tainted: G O 4.15.0+ #3 + Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 + EIP: do_oops+0x8/0x10 [oops] + CR0: 80050033 CR2: 00000042 CR3: 0785f000 CR4: 00000690 + EIP: 0x44902cc2 + EFLAGS: 00000206 CPU: 0 + EAX: ffffffda EBX: 08afb050 ECX: 0000eef4 EDX: 08afb008 + ESI: 00000000 EDI: bf914dbc EBP: 00000000 ESP: bf914c1c + DS: 007b ES: 007b FS: 0000 GS: 0033 SS: 007b + Code: 42 00 00 00 5d c3 90 55 89 e5 83 ec 04 c7 04 24 24 70 81 c8 e8 + Killed + +Another important thing that an oops can provide is the stack trace of functions called before +the fault happend: + +.. slide:: Oops stacktrace + :inline-contents: True + :level: 2 + + + .. code-block:: bash + + root@qemux86:~/skels/debugging/oops# insmod oops.ko + BUG: unable to handle kernel NULL pointer dereference at 00000042 + Call Trace: + so2_oops_init+0x17/0x20 [oops] + do_one_initcall+0x37/0x170 + ? cache_alloc_debugcheck_after.isra.19+0x15f/0x2f0 + ? __might_sleep+0x32/0x90 + ? trace_hardirqs_on_caller+0x11c/0x1a0 + ? do_init_module+0x17/0x1c2 + ? kmem_cache_alloc+0xa4/0x1e0 + ? do_init_module+0x17/0x1c2 + do_init_module+0x46/0x1c2 + load_module+0x1f45/0x2380 + SyS_init_module+0xe5/0x100 + do_int80_syscall_32+0x61/0x190 + entry_INT80_32+0x2f/0x2f + Killed + +Decoding an oops +---------------- + +.. slide:: Debugging + :inline-contents: True + :level: 2 + + * CONFIG_DEBUG_INFO + * addr2line + * gdb + * objdump -dSr + +addr2line +--------- + +*addr2line* translates addresses into file names and line numbers. Given +an address in an executable it uses the debugging information to figure out +which file name and line number are associated with it. + +Modules are loaded at dynamic addresses but are compiled starting with 0 as +a base address. So, in order to find the line number for a given dynamic address +we need to know module's load address. + +.. slide:: addr2line + :inline-contents: True + :level: 2 + + .. code-block:: bash + + $ addr2line -e oops.o 0x08 + $ skels/debugging/oops/oops.c:5 + $ # 0x08 is the offset of the offending instruction inside the oops.ko module + +objdump +------- + +Similar we can determine the offending line using objdump: + +.. slide:: objdump + :inline-contents: True + :level: 2 + + .. code-block:: bash + + $ cat /proc/modules + oops 20480 1 - Loading 0xc8816000 (O+) + + $ objdump -dS --adjust-vma=0xc8816000 oops.ko + c8816000: b8 61 00 00 00 mov $0x61,%eax + + static noinline void do_oops(void) + { + c8816005: 55 push %ebp + c8816006: 89 e5 mov %esp,%ebp + *(int*)0x42 = 'a'; + c8816008: a3 42 00 00 00 mov %eax,0x42 + +gdb +--- + +.. slide:: gdb + :inline-contents: True + :level: 2 + + .. code-block:: bash + + $ gdb ./vmlinux + + (gdb) list *(do_panic+0x8) + 0xc1244138 is in do_panic (lib/test_panic.c:8). + 3 + 4 static struct timer_list panic_timer; + 5 + 6 static void do_panic(struct timer_list *unused) + 7 { + 8 *(int*)0x42 = 'a'; + 9 } + 10 + 11 static int so2_panic_init(void) + +Kernel panic +------------ + +A kernel panic is a special type of oops where the kernel cannot continue execution. For example +if the function do_oops from above was called in the interrupt context, the kernel wouldn't know how to kill +and it will decide that it is better to crash the kernel and stop execution. + +Here is a sample code that will generate a kernel panic: + +.. slide:: Kernel panic + :inline-contents: True + :level: 2 + + .. code-block:: c + + static struct timer_list panic_timer; + + static void do_panic(struct timer_list *unused) + { + *(int*)0x42 = 'a'; + } + + static int so2_panic_init(void) + { + pr_info("panic_init\n"); + + timer_setup(&panic_timer, do_panic, 0); + mod_timer(&panic_timer, jiffies + 2 * HZ); + + return 0; + } + +Loading the module will generate the following kernel panic message: + +.. code-block:: bash + + root@qemux86:~/skels/debugging/panic# insmod panic.ko + panic: loading out-of-tree module taints kernel. + panic_init + root@qemux86:~/skels/debugging/panic# BUG: unable to handle kernel NULL pointer dereference at 00000042 + IP: do_panic+0x8/0x10 [panic] + *pde = 00000000 + Oops: 0002 [#1] SMP + Modules linked in: panic(O) + CPU: 0 PID: 0 Comm: swapper/0 Tainted: G O 4.15.0+ #19 + Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 + EIP: do_panic+0x8/0x10 [panic] + EFLAGS: 00010246 CPU: 0 + EAX: 00000061 EBX: 00000101 ECX: 000002d8 EDX: 00000000 + ESI: c8817000 EDI: c8819200 EBP: c780ff34 ESP: c780ff34 + DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0068 + CR0: 80050033 CR2: 00000042 CR3: 0716b000 CR4: 00000690 + Call Trace: + + call_timer_fn+0x63/0xf0 + ? process_timeout+0x10/0x10 + run_timer_softirq+0x14f/0x170 + ? 0xc8817000 + ? trace_hardirqs_on_caller+0x9b/0x1a0 + __do_softirq+0xde/0x1f2 + ? __irqentry_text_end+0x6/0x6 + do_softirq_own_stack+0x57/0x70 + + irq_exit+0x7d/0x90 + smp_apic_timer_interrupt+0x4f/0x90 + ? trace_hardirqs_off_thunk+0xc/0x1d + apic_timer_interrupt+0x3a/0x40 + EIP: default_idle+0xa/0x10 + EFLAGS: 00000246 CPU: 0 + EAX: c15c97c0 EBX: 00000000 ECX: 00000000 EDX: 00000001 + ESI: 00000000 EDI: 00000000 EBP: c15c3f48 ESP: c15c3f48 + DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0068 + arch_cpu_idle+0x9/0x10 + default_idle_call+0x19/0x30 + do_idle+0x105/0x180 + cpu_startup_entry+0x25/0x30 + rest_init+0x1e3/0x1f0 + start_kernel+0x305/0x30a + i386_start_kernel+0x95/0x99 + startup_32_smp+0x15f/0x164 + Code: 42 00 00 00 5d c3 90 55 89 e5 83 ec 08 c7 04 24 24 80 81 c8 e8 + EIP: do_panic+0x8/0x10 [panic] SS:ESP: 0068:c780ff34 + CR2: 0000000000000042 + ---[ end trace 77f49f83f2e42f91 ]--- + Kernel panic - not syncing: Fatal exception in interrupt + Kernel Offset: disabled + ---[ end Kernel panic - not syncing: Fatal exception in interrupt + + +List debugging +============== + +In order to catch access to uninitialized elements the kernel uses poison +magic values. + +.. slide:: List debugging + :inline-contents: True + :level: 2 + + .. code-block:: bash + + static inline void list_del(struct list_head *entry) + { + __list_del(entry->prev, entry->next); + entry->next = (struct list_head*)LIST_POISON1; + entry->prev = (struct list_head*)LIST_POISON2; + } + + BUG: unable to handle kernel NULL pointer dereference at 00000100 + IP: crush+0x80/0xb0 [list] + +Memory debugging +================ + +There are several tools for memory debugging: + +.. slide:: Memory debugging + :inline-contents: True + :level: 2 + + * SLAB/SLUB debugging + * KASAN + * kmemcheck + * DEBUG_PAGEALLOC + +Slab debugging +--------------- + +Slab debugging uses a memory poison technique to detect several types of memory +bugs in the SLAB/SUB allocators. + +The allocated buffers are guarded with memory that has been filled in with +special markers. Any adjacent writes to the buffer will be detected at a later +time when other memory management operations on that buffer are performed +(e.g. when the buffer is freed). + +Upon allocation of the buffer, the buffer it is also filled in with a special +value to potentially detect buffer access before initialization (e.g. if the +buffer holds pointers). The value is selected in such a way that it is unlikely +to be a valid address and as such to trigger kernel bugs at the access time. + +A similar technique is used when freeing the buffer: the buffer is filled with +another special value that will cause kernel bugs if pointers are accessed after +the memory is freed. In this case, the allocator also checks the next time the +buffer is allocated that the buffer was not modified. + +The diagram bellow shows a summary of the way SLAB/SLUB poisoning works: + + +.. slide:: Slab debugging + :inline-contents: True + :level: 2 + + * CONFIG_DEBUG_SLAB + * poisoned based memory debuggers + + .. ditaa:: + +--------------+-----------------------+--------------+ + | cF88 | c8F8 | cF88 | + | Buffer | Allocated buffer | Buffer | + | Underflow | 0x5a5a5a5a | Overflow | + | Poison | 0x5a5a5a5a | Poison | + | | 0x5a5a5a5a | | + +--------------+-----------------------+--------------+ + + +--------------+-----------------------+--------------+ + | cF88 | c888 | cF88 | + | Buffer | Freed buffer | Buffer | + | Underflow | 0x6b6b6b6b | Overflow | + | Poison | 0x6b6b6b6b | Poison | + | | 0x6b6b6b6b | | + +--------------+-----------------------+--------------+ + + +Example of an use before initialize bug: + +.. slide:: Use before initialize bugs + :inline-contents: True + :level: 2 + + :: + + BUG: unable to handle kernel paging request at 5a5a5a5a + IP: [] __list_del_entry+0x37/0x71 + … + Call Trace: + [] list_del+0xb/0x1b + [] use_before_init+0x31/0x38 [crusher] + [] crush_it+0x38/0xa9 [crusher] + [] init_module+0x8/0xa [crusher] + [] do_one_initcall+0x72/0x119 + [] ? crush_it+0xa9/0xa9 [crusher] + [] sys_init_module+0xc8d/0xe77 + [] syscall_call+0x7/0xb + + .. code-block:: c + + noinline void use_before_init(void) + { + struct list_m *m = kmalloc(sizeof(*m), GFP_KERNEL); + + printk("%s\n", __func__); + list_del(&m->lh); + } + +Example of an use after free bug: + +.. slide:: Use after free bug + :inline-contents: True + :level: 2 + + :: + + BUG: unable to handle kernel paging request at 6b6b6b6b + IP: [] __list_del_entry+0x37/0x71 + … + Call Trace: + [] list_del+0xb/0x1b + [] use_after_free+0x38/0x3f [crusher] + [] crush_it+0x52/0xa9 [crusher] + [] init_module+0x8/0xa [crusher] + [] do_one_initcall+0x72/0x119 + [] ? crush_it+0xa9/0xa9 [crusher] + [] sys_init_module+0xc8d/0xe77 + [] syscall_call+0x7/0xb + + .. code-block:: c + + noinline void use_after_free(void) + { + struct list_m *m = kmalloc(sizeof(*m), GFP_KERNEL); + + printk("%s\n", __func__); + kfree(m); + list_del(&m->lh); + } + +Another example of an use after free bug is shown below. Note that this time the +bug is detected at the next allocation. + +.. slide:: Use after free bug + :inline-contents: True + :level: 2 + + :: + + # insmod /system/lib/modules/crusher.ko test=use_before_init + Slab corruption: size-4096 start=ed612000, len=4096 + 000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + 010: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 6b 6b + + .. code-block:: c + + noinline void use_after_free2(void) + { + char *b = kmalloc(3000, GFP_KERNEL); + kfree(b); + memset(b, 0, 30); + b = kmalloc(3000, GFP_KERNEL); + kfree(b); + } + +Finally this is an example of a buffer overflow bug: + +.. slide:: Buffer overflow bugs + :inline-contents: True + :level: 2 + + :: + + slab error in verify_redzone_free(): cache `dummy': memory outside object was overwritten + Pid: 1282, comm: insmod Not tainted 3.0.16-mid10-00007-ga4a6b62-dirty #70 + Call Trace: + [] __slab_error+0x17/0x1c + [] __cache_free+0x12c/0x317 + [] kmem_cache_free+0x2b/0xaf + [] buffer_overflow+0x4c/0x57 [crusher] + [] crush_it+0x6c/0xa9 [crusher] + [] init_module+0x8/0xd [crusher] + [] do_one_initcall+0x72/0x119 + [] sys_init_module+0xc8d/0xe77 + [] syscall_call+0x7/0xb + eb002bf8: redzone 1:0xd84156c5635688c0, redzone 2:0x0 + + .. code-block:: c + + noinline void buffer_overflow(void) + { + struct kmem_cache *km = kmem_cache_create("dummy", 3000, 0, 0, NULL); + char *b = kmem_cache_alloc(km, GFP_KERNEL); + + printk("%s\n", __func__); + memset(b, 0, 3016); + kmem_cache_free(km, b); + } + + +DEBUG_PAGEALLOC +--------------- + +.. slide:: DEBUG_PAGEALLOC + :inline-contents: True + :level: 2 + + * Memory debugger that works at a page level + * Detects invalid accesses either by: + + * Filling pages with poison byte patterns and checking the pattern at + reallocation + * Unmapping the dellocated pages from kernel space (just a few + architectures) + + +KASan +----- + +KASan is a dynamic memory error detector designed to find use-after-free +and out-of-bounds bugs. + +The main idea of KASAN is to use shadow memory to record whether each byte +of memory is safe to access or not, and use compiler's instrumentation to +check the shadow memory on each memory access. + +Address sanitizer uses 1 byte of shadow memory to track 8 bytes of kernel +address space. It uses 0-7 to encode the number of consecutive bytes at +the beginning of the eigh-byte region that are valid. + +See `The Kernel Address Sanitizer (KASAN)` for more information and have a look +at lib/test_kasan.c for an example of problems that KASan can detect. + +.. slide:: KASan + :inline-contents: True + :level: 2 + + * dynamic memory error detector + * finds user-after-free or out-of-bound bugs + * uses shadow memory to track memory operations + * lib/test_kasan.c + + +KASan vs DEBUG_PAGEALLOC +~~~~~~~~~~~~~~~~~~~~~~~~ + +.. slide:: KASan vs DEBUG_PAGEALLOC + :inline-contents: True + :level: 2 + + KASan is slower than DEBUG_PAGEALLOC, but KASan works on sub-page granularity + level, so it able to find more bugs. + + +KASan vs SLUB_DEBUG +~~~~~~~~~~~~~~~~~~~ + +.. slide:: KASan vs SLUB_DEBUG + :inline-contents: True + :level: 2 + + * SLUB_DEBUG has lower overhead than KASan. + * SLUB_DEBUG in most cases are not able to detect bad reads, KASan able to + detect both reads and writes. + * In some cases (e.g. redzone overwritten) SLUB_DEBUG detect bugs only on + allocation/freeing of object. KASan catch bugs right before it will happen, + so we always know exact place of first bad read/write. + + +Kmemleak +-------- + +Kmemleak provides a way of detecting kernel memory leaks in a way similar to a +tracing garbage collector. Since tracing pointers is not possible in C, kmemleak +scans the kernel stacks as well as dynamically and statically kernel memory for +pointers to allocated buffers. A buffer for which there is no pointer is +considered as leaked. The basic steps to use kmemleak are presented bellow, for +more information see `Kernel Memory Leak Detector` + + +.. slide:: Kmemleak + :inline-contents: True + :level: 2 + + * enable kernel config: `CONFIG_DEBUG_KMEMLEAK` + * setup: `mount -t debugfs nodev /sys/kernel/debug` + * trigger a memory scan: `echo scan > /sys/kernel/debug/kmemleak` + * show memory leaks: `cat /sys/kernel/debug/kmemleak` + * clear all possible leaks: `echo clear > /sys/kernel/debug/kmemleak` + +As an example, lets look at the following simple module: + +.. slide:: Kmemleak example + :inline-contents: True + :level: 2 + + .. code-block:: c + + static int leak_init(void) + { + pr_info("%s\n", __func__); + + (void)kmalloc(16, GFP_KERNEL); + + return 0; + } + + MODULE_LICENSE("GPL v2"); + module_init(leak_init); + +Loading the module and triggering a kmemleak scan will issue the +following report: + +.. slide:: Kmemleak report + :inline-contents: True + :level: 2 + + :: + + root@qemux86:~# insmod skels/debugging/leak/leak.ko + leak: loading out-of-tree module taints kernel. + leak_init + root@qemux86:~# echo scan > /sys/kernel/debug/kmemleak + root@qemux86:~# echo scan > /sys/kernel/debug/kmemleak + kmemleak: 1 new suspected memory leaks (see /sys/kernel/debug/kmemleak) + root@qemux86:~# cat /sys/kernel/debug/kmemleak + unreferenced object 0xd7871500 (size 32): + comm "insmod", pid 237, jiffies 4294902108 (age 24.628s) + hex dump (first 32 bytes): + 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a ZZZZZZZZZZZZZZZZ + 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a a5 ZZZZZZZZZZZZZZZ. + backtrace: + [<(ptrval)>] kmem_cache_alloc_trace+0x163/0x310 + [<(ptrval)>] leak_init+0x2f/0x1000 [leak] + [<(ptrval)>] do_one_initcall+0x57/0x2e0 + [<(ptrval)>] do_init_module+0x4b/0x1be + [<(ptrval)>] load_module+0x201a/0x2590 + [<(ptrval)>] sys_init_module+0xfd/0x120 + [<(ptrval)>] do_int80_syscall_32+0x6a/0x1a0 + + +.. note:: Notice that we did not had to unload the module to detect the memory + leak since kmemleak detects that the allocated buffer is not + reachable anymore. + + +Lockdep checker +=============== + +.. slide:: Lockdep checker + :inline-contents: True + :level: 2 + + * CONFIG_DEBUG_LOCKDEP + * Detects lock inversio, circular dependencies, incorrect usage of locks + (including interrupt context) + * Maintains dependency between classes of locks not individual locks + * Each scenario is only checked once and hashed + + +Lets take for example the following kernel module that runs two kernel threads: + +.. slide:: AB BA Deadlock Example + :inline-contents: True + :level: 2 + + .. code-block:: c + + static noinline int thread_a(void *unused) + { + mutex_lock(&a); pr_info("%s acquired A\n", __func__); + mutex_lock(&b); pr_info("%s acquired B\n", __func__); + + mutex_unlock(&b); + mutex_unlock(&a); + + return 0; + } + + .. code-block:: c + + static noinline int thread_b(void *unused) + { + mutex_lock(&b); pr_info("%s acquired B\n", __func__); + mutex_lock(&a); pr_info("%s acquired A\n", __func__); + + mutex_unlock(&a); + mutex_unlock(&b); + + return 0; + } + + +Loading this module with lockdep checker active will produce the following +kernel log: + +.. slide:: AB BA Deadlock Report + :inline-contents: True + :level: 2 + + :: + + thread_a acquired A + thread_a acquired B + thread_b acquired B + + ====================================================== + WARNING: possible circular locking dependency detected + 4.19.0+ #4 Tainted: G O + ------------------------------------------------------ + thread_b/238 is trying to acquire lock: + (ptrval) (a){+.+.}, at: thread_b+0x48/0x90 [locking] + + but task is already holding lock: + (ptrval) (b){+.+.}, at: thread_b+0x27/0x90 [locking] + + which lock already depends on the new lock. + + +As you can see, although the deadlock condition did not trigger (because thread +A did not complete execution before thread B started execution) the lockdep +checker identified a potential deadlock scenario. + +Lockdep checker will provide even more information to help determine what caused +the deadlock, like the dependency chain: + +.. slide:: AB BA Deadlock Report (dependency chain) + :inline-contents: True + :level: 2 + + :: + + the existing dependency chain (in reverse order) is: + + -> #1 (b){+.+.}: + __mutex_lock+0x60/0x830 + mutex_lock_nested+0x20/0x30 + thread_a+0x48/0x90 [locking] + kthread+0xeb/0x100 + ret_from_fork+0x2e/0x38 + + -> #0 (a){+.+.}: + lock_acquire+0x93/0x190 + __mutex_lock+0x60/0x830 + mutex_lock_nested+0x20/0x30 + thread_b+0x48/0x90 [locking] + kthread+0xeb/0x100 + ret_from_fork+0x2e/0x38 + +and even an unsafe locking scenario: + +.. slide:: AB BA Deadlock Report (unsafe locking scenario) + :inline-contents: True + :level: 2 + + :: + + other info that might help us debug this: + + Possible unsafe locking scenario: + + CPU0 CPU1 + ---- ---- + lock(b); + lock(a); + lock(b); + lock(a); + + *** DEADLOCK *** + + +Another example of unsafe locking issues that lockdep checker detects +is unsafe locking from interrupt context. Lets consider the following +kernel module: + +.. slide:: IRQ Deadlock Example + :inline-contents: True + :level: 2 + + .. code-block:: c + + static DEFINE_SPINLOCK(lock); + + static void timerfn(struct timer_list *unused) + { + pr_info("%s acquiring lock\n", __func__); + spin_lock(&lock); pr_info("%s acquired lock\n", __func__); + spin_unlock(&lock); pr_info("%s released lock\n", __func__); + } + + static DEFINE_TIMER(timer, timerfn); + + int init_module(void) + { + mod_timer(&timer, jiffies); + + pr_info("%s acquiring lock\n", __func__); + spin_lock(&lock); pr_info("%s acquired lock\n", __func__); + spin_unlock(&lock); pr_info("%s released lock\n", __func__); + return 0; + } + + +As in the previous case, loading the module will trigger a lockdep +warning: + +.. slide:: IRQ Deadlock Report + :inline-contents: True + :level: 2 + + :: + + init_module acquiring lock + init_module acquired lock + init_module released lock + timerfn acquiring lock + + ================================ + WARNING: inconsistent lock state + 4.19.0+ #4 Tainted: G O + -------------------------------- + inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage. + ksoftirqd/0/9 [HC0[0]:SC1[1]:HE1:SE0] takes: + (ptrval) (lock#4){+.?.}, at: timerfn+0x25/0x60 [locking2] + {SOFTIRQ-ON-W} state was registered at: + lock_acquire+0x93/0x190 + _raw_spin_lock+0x39/0x50 + init_module+0x35/0x70 [locking2] + do_one_initcall+0x57/0x2e0 + do_init_module+0x4b/0x1be + load_module+0x201a/0x2590 + sys_init_module+0xfd/0x120 + do_int80_syscall_32+0x6a/0x1a0 + restore_all+0x0/0x8d + + +The warning will also provide additional information and a potential unsafe +locking scenario: + +.. slide:: IRQ Deadlock Report + :inline-contents: True + :level: 2 + + :: + + Possible unsafe locking scenario: + + CPU0 + ---- + lock(lock#4); + + lock(lock#4); + + *** DEADLOCK *** + + 1 lock held by ksoftirqd/0/9: + #0: (ptrval) (/home/tavi/src/linux/tools/labs/skels/./debugging/locking2/locking2.c:13){+.-.}, at: call_timer_f0 + stack backtrace: + CPU: 0 PID: 9 Comm: ksoftirqd/0 Tainted: G O 4.19.0+ #4 + Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014 + Call Trace: + dump_stack+0x66/0x96 + print_usage_bug.part.26+0x1ee/0x200 + mark_lock+0x5ea/0x640 + __lock_acquire+0x4b4/0x17a0 + lock_acquire+0x93/0x190 + _raw_spin_lock+0x39/0x50 + timerfn+0x25/0x60 [locking2] + + +perf +==== + +.. slide:: perf + :inline-contents: True + :level: 2 + + * performance counters, tracepoints, kprobes, uprobes + * hardware events: CPU cycles, TLB misses, cache misses + * software events: page faults , context switches + * collects backtraces (user + kernel) + +Other tools +=========== + +.. slide:: Other tools + :inline-contents: True + :level: 2 + + * ftrace + * kprobes + * sparse + * coccinelle + * checkpatch.pl + * printk + * dump_stack() diff --git a/Documentation/teaching/lectures/fs.rst b/Documentation/teaching/lectures/fs.rst new file mode 100644 index 00000000000000..20b4e6d68a02ed --- /dev/null +++ b/Documentation/teaching/lectures/fs.rst @@ -0,0 +1,775 @@ +===================== +Filesystem Management +===================== + +`View slides `_ + +.. slideconf:: + :autoslides: False + :theme: single-level + +Lecture objectives: +=================== + +.. slide:: Filesystem Management + :inline-contents: True + :level: 2 + + * Filesystem abstractions + + * Filesystem operations + + * Linux VFS + + * Overview of Linux I/O Management + + +Filesystem Abstractions +======================= + +A fileystem is a way to organize files and directories on storage +devices such as hard disks, SSDs or flash memory. There are many types +of filesystems (e.g. FAT, ext4, btrfs, ntfs) and on one running system +we can have multiple instances of the same filesystem type in use. + +While filesystems use different data structures to organizing the +files, directories, user data and meta (internal) data on storage +devices there are a few common abstractions that are used in almost +all filesystems: + +.. slide:: Filesystem Abstractions + :inline-contents: True + :level: 2 + + * superblock + + * file + + * inode + + * dentry + + +Some of these abstractions are present both on disk and in memory +while some are only present in memory. + +The *superblock* abstraction contains information about the filesystem +instance such as the block size, the root inode, filesystem size. It +is present both on storage and in memory (for caching purposes). + +The *file* abstraction contains information about an opened file such +as the current file pointer. It only exists in memory. + +The *inode* is identifying a file on disk. It exists both on storage +and in memory (for caching purposes). An inode identifies a file in a +unique way and has various properties such as the file size, access +rights, file type, etc. + +.. note:: The file name is not a property of the file. + +The *dentry* associates a name with an inode. It exists both on +storage and in memory (for caching purposes). + +The following diagram shows the relationship between the various filesystem +abstractions as they used in memory: + +.. slide:: Filesystem Abstractions - in memory + :inline-contents: True + :level: 2 + + .. ditaa:: + :--no-separation: + + file + descriptor + table + +------------+ +--------+ +--------+ +---------+ + | |------+--->| FILE |------->| dentry |------->| inode | + +------------+ | +--------+ +--------+ ^ +---------+ + +-> | |------+ dup | | type | + | +------------+ hard link | | perm | + | | ... | | | .... | + | +------------+ +--------+ +--------+ | +---------+ + | | |---------->| FILE |------->| dentry |---+ | + | +------------+ +--------+ +--------+ | + fd | + | + +------+ <-------------------+ + | data | + +------+ + +------+ +------+ + | data | | data | + +------+ +------+ + +------+ + | data | + +------+ + +Note that not all of the one to many relationships between the various +abstractions are depicted. + +Multiple file descriptors can point to the same *file* because we can +use the :c:func:`dup` system call to duplicate a file descriptor. + +Multiple *file* abstractions can point to the same *dentry* if we open +the same path multiple times. + +Multiple *dentries* can point to the same *inode* when hard links are +used. + +The following diagram shows the relationship of the filesystem +abstraction on storage: + +.. slide:: Filesystem Abstractions - on storage + :inline-contents: True + :level: 2 + + .. ditaa:: + :--no-separation: + + + +--------+ +-------+ data +--------+ + | dentry |-------------->| inode |--------+ | dentry | + +--------+ +-------+ | +--------+ + | ...... | | ..... | | | ...... | + +--------+ +-------+ dir | +--------+ + | dentry | | inode |--------|--+ | dentry | + +--------+ +-------+ | | +--------+ + ^ | | ^ + | | | | + | | | +--------+ + | V v | + +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+ + blocks | | | | | | | | | | | | | | | | | | + +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+ + | | + | +------------+ | ++++++++++++ + +--->| superblock | +--->|||||||||||| block management + +------------+ ++++++++++++ + + +The diagram shows that the *superblock* is typically stored at the +beginning of the fileystem and that various blocks are used with +different purposes: some to store dentries, some to store inodes and +some to store user data blocks. There are also blocks used to manage +the available free blocks (e.g. bitmaps for the simple filesystems). + +The next diagram show a very simple filesystem where blocks are +grouped together by function: + +* the superblock contains information about the block size as well as + the IMAP, DMAP, IZONE and DZONE areas. + +* the IMAP area is comprised of multiple blocks which contains a + bitmap for inode allocation; it maintains the allocated/free state + for all inodes in the IZONE area + +* the DMAP area is comprised of multiple blocks which contains a + bitmap for data blocks; it maintains the allocated/free state for + all blocks the DZONE area + + +.. slide:: Simple filesystem example + :inline-contents: True + :level: 2 + + |_| + + .. ditaa:: + :--no-separation: + + +--------------+--------+--------+---------+---------+ + | | | | | | + | Superblock | IMAP | DMAP | IZONE | DZONE | + | | | | | | + +--------------+--------+--------+---------+---------+ + + +Filesystem Operations +===================== + +The following diagram shows a high level overview of how the file +system drivers interact with the rest of the file system "stack". In +order to support multiple filesystem types and instances Linux +implements a large and complex subsystem that deals with filesystem +management. This is called Virtual File System (or sometimes Virtual +File Switch) and it is abbreviated with VFS. + + +.. slide:: Overview + :inline-contents: True + :level: 2 + + .. ditaa:: + + ^ ^ ^ + | stat | open | read + v v v + +------------------------------------------------------------+ + | | + | Virtual Filesystem Switch | + | | + +------------------------------------------------------------+ + ^ ^ + | | + v v + +-------------+ +-------------+ + | Filesystem | | Filesystem | + | driver | | driver | + +-------------+ +-------------+ + ^ ^ + | | + v v + +------------------------------------------------------------+ + | | + | Block I/O layer | + | | + +------------------------------------------------------------+ + +VFS translates the complex file management related system calls to +simpler operations that are implemented by the device drivers. These +are some of the operations that a file system must implement: + +.. slide:: Filesystem Operations + :inline-contents: True + :level: 2 + + * Mount + + * Open a file + + * Querying file attributes + + * Reading data from a file + + * Writing file to a file + + * Creating a file + + * Deleting a file + + +The next sections will look in-depth at some of these operations. + +Mounting a filesystem +--------------------- + +A summary of a typical implementation is presented below: + +.. slide:: Mounting a filesystem + :inline-contents: True + :level: 2 + + * Input: a storage device (partition) + + * Output: dentry pointing to the root directory + + * Steps: check device, determine filesystem parameters, locate the root inode + + * Example: check magic, determine block size, read the root inode and create dentry + + +Opening a file +-------------- + +A summary of a typical implementation is presented below: + +.. slide:: Opening a file + :inline-contents: True + :level: 2 + + * Input: path + + * Output: file descriptor + + * Steps: + + * Determine the filesystem type + + * For each name in the path: lookup parent dentry, load inode, + load data, find dentry + + * Create a new *file* that points to the last *dentry* + + * Find a free entry in the file descriptor table and set it to *file* + + +Querying file attributes +------------------------ + +A summary of a typical implementation is presented below: + +.. slide:: Querying file attributes + :inline-contents: True + :level: 2 + + * Input: path + + * Output: file attributes + + * Steps: + + * Access `file->dentry->inode` + + * Read file attributes from the *inode* + +Reading data from a file +------------------------ + +A summary of a typical implementation is presented below: + +.. slide:: Reading data from a file + :inline-contents: True + :level: 2 + + * Input: file descriptor, offset, length + + * Output: data + + * Steps: + + * Access `file->dentry->inode` + + * Determine data blocks + + * Copy data blocks to memory + + +Writing data to a file +---------------------- + +A summary of a typical implementation is presented below: + +.. slide:: Writing data to a file + :inline-contents: True + :level: 2 + + * Input: file descriptor, offset, length, data + + * Output: + + * Steps: + + * Allocate one or more data blocks + + * Add the allocated blocks to the inode and update file size + + * Copy data from userspace to internal buffers and write them to + storage + + +Closing a file +-------------- + +A summary of a typical implementation is presented below: + +.. slide:: Closing a file + :inline-contents: True + :level: 2 + + * Input: file descriptor + + * Output: + + * Steps: + + * set the file descriptor entry to NULL + + * Decrement file reference counter + + * When the counter reaches 0 free *file* + + +Directories +----------- + +.. slide:: Directories + :inline-contents: True + :level: 2 + + Directories are special files which contain one or more dentries. + +Creating a file +--------------- + +A summary of a typical implementation is presented below: + +.. slide:: Creating a file + :inline-contents: True + :level: 2 + + * Input: path + + * Output: + + * Steps: + + * Determine the inode directory + + * Read data blocks and find space for a new dentry + + * Write back the modified inode directory data blocks + + +Deleting a file +--------------- + +A summary of a typical implementation is presented below: + + +.. slide:: Deleting a file + :inline-contents: True + :level: 2 + + * Input: path + + * Output: + + * Steps: + + * determine the parent inode + + * read parent inode data blocks + + * find and erase the dentry (check for links) + + * when last file is closed: deallocate data and inode blocks + + +Linux Virtual File System +========================= + +Although the main purpose for the original introduction of VFS in UNIX +kernels was to support multiple filesystem types and instances, a side +effect was that it simplified fileystem device driver development +since command parts are now implement in the VFS. Almost all of the +caching and buffer management is dealt with VFS, leaving just +efficient data storage management to the filesystem device driver. + +In order to deal with multiple filesystem types, VFS introduced the +common filesystem abstractions previously presented. Note that the +filesystem driver can also use its own particular fileystem +abstractions in memory (e.g. ext4 inode or dentry) and that there +might be a different abstraction on storage as well. Thus we may end +up with three slightly different filesystem abstractions: one for +VFS - always in memory, and two for a particular filesystem - one in +memory used by the filesystem driver, and one on storage. + +.. slide:: Virtual File System + :level: 2 + :inline-contents: True + + .. ditaa:: + :height: 100% + + + ^ ^ ^ + | stat | open | read + v v v + +------------------------------------------------------------+ + | Virtual File System | + | | + | | + | /-------\ /--------\ /--------\ | + | | inode |<----------+ dentry |<----------+ FILE | | + | \---+---/ \----+---/ \---+----/ | + | | | | | + | | | | | + | v v v | + | +-------+ +--------+ +-------+ | + | | inode | | dentry | | page | | + | | cache | | cache | | cache | | + | +-------+ +--------+ +-------+ | + | | + +------------------------------------------------------------+ + ^ ^ + | | + v v + +-------------+ +-------------+ + | Filesystem | | Filesystem | + | driver | | driver | + +-------------+ +-------------+ + + +Superblock Operations +--------------------- + +VFS requires that all filesystem implement a set of "superblock +operations". + +They deal with initializing, updating and freeing the VFS superblock: + + * :c:func:`fill_super` - reads the filesystem statistics (e.g. total + number of inode, free number of inodes, total number of blocks, free + number of blocks) + + * :c:func:`write_super` - updates the superblock information on storage + (e.g. updating the number of free inode or data blocks) + + * :c:func:`put_super` - free any data associated with the filsystem + instance, called when unmounting a filesystem + +The next class of operations are dealing with manipulating fileystem +inodes. These operations will receive VFS inodes as parameters but the +filesystem driver may use its own inode structures internally and, if +so, they will convert in between them as necessary. + +A summary of the superblock operations are presented below: + +.. slide:: Superblock Operations + :level: 2 + :inline-contents: True + + .. hlist:: + :columns: 2 + + * fill_super + * put_super + * write_super + * read_inode + * write_inode + * evict_inode + * statfs + * remount_fs + + +Inode Operations +---------------- + +The next set of operations that VFS calls when interacting with +filesystem device drivers are the "inode operations". Non-intuitively +these mostly deal with manipulating dentries - looking up a file name, +creating, linking and removing files, dealing with symbolic links, +creating and removing directories. + +This is the list of the most important inode operations: + +.. slide:: Inode Operations + :level: 2 + :inline-contents: True + + .. hlist:: + :columns: 2 + + * create + * lookup + * link + * unlink + * symlink + * mkdir + * rmdir + * rename + * readlink + * follow_link + * put_link + * ... + + +The Inode Cache +--------------- + +The inode cache is used to avoid reading and writing inodes to and +from storage every time we need to read or update them. The cache uses +a hash table and inodes are indexed with a hash function which takes +as parameters the superblock (of a particular filesystem instance) and +the inode number associated with an inode. + +inodes are cached until either the filesystem is unmounted, the inode +deleted or the system enters a memory pressure state. When this +happens the Linux memory management system will (among other things) +free inodes from the inode cache based on how often they were +accessed. + +.. slide:: The Inode Cache + :level: 2 + :inline-contents: True + + * Caches inodes into memory to avoid costly storage operations + + * An inode is cached until low memory conditions are triggered + + * inodes are indexed with a hash table + + * The inode hash function takes the superblock and inode number as + inputs + + +The Dentry Cache +---------------- + +.. slide:: The Dentry Cache + :level: 2 + :inline-contents: True + + * State: + + * Used – *d_inode* is valid and the *dentry* object is in use + + * Unused – *d_inode* is valid but the dentry object is not in use + + * Negative – *d_inode* is not valid; the inode was not yet loaded + or the file was erased + + * Dentry cache + + * List of used dentries (dentry->d_state == used) + + * List of the most recent used dentries (sorted by access time) + + * Hash table to avoid searching the tree + +The Page Cache +-------------- + +.. slide:: The Page Cache + :level: 2 + :inline-contents: True + + * Caches file data and not block device data + + * Uses the :c:type:`struct address_space` to translate file offsets + to block offsets + + * Used for both `read` / `write` and `mmap` + + * Uses a radix tree + + + +.. slide:: struct address_space + :level: 2 + :inline-contents: True + + .. code-block:: c + + /** + * struct address_space - Contents of a cacheable, mappable object. + * @host: Owner, either the inode or the block_device. + * @i_pages: Cached pages. + * @gfp_mask: Memory allocation flags to use for allocating pages. + * @i_mmap_writable: Number of VM_SHARED mappings. + * @nr_thps: Number of THPs in the pagecache (non-shmem only). + * @i_mmap: Tree of private and shared mappings. + * @i_mmap_rwsem: Protects @i_mmap and @i_mmap_writable. + * @nrpages: Number of page entries, protected by the i_pages lock. + * @nrexceptional: Shadow or DAX entries, protected by the i_pages lock. + * @writeback_index: Writeback starts here. + * @a_ops: Methods. + * @flags: Error bits and flags (AS_*). + * @wb_err: The most recent error which has occurred. + * @private_lock: For use by the owner of the address_space. + * @private_list: For use by the owner of the address_space. + * @private_data: For use by the owner of the address_space. + */ + struct address_space { + struct inode *host; + struct xarray i_pages; + gfp_t gfp_mask; + atomic_t i_mmap_writable; + #ifdef CONFIG_READ_ONLY_THP_FOR_FS + /* number of thp, only for non-shmem files */ + atomic_t nr_thps; + #endif + struct rb_root_cached i_mmap; + struct rw_semaphore i_mmap_rwsem; + unsigned long nrpages; + unsigned long nrexceptional; + pgoff_t writeback_index; + const struct address_space_operations *a_ops; + unsigned long flags; + errseq_t wb_err; + spinlock_t private_lock; + struct list_head private_list; + void *private_data; + } __attribute__((aligned(sizeof(long)))) __randomize_layout; + + struct address_space_operations { + int (*writepage)(struct page *page, struct writeback_control *wbc); + int (*readpage)(struct file *, struct page *); + + /* Write back some dirty pages from this mapping. */ + int (*writepages)(struct address_space *, struct writeback_control *); + + /* Set a page dirty. Return true if this dirtied it */ + int (*set_page_dirty)(struct page *page); + + /* + * Reads in the requested pages. Unlike ->readpage(), this is + * PURELY used for read-ahead!. + */ + int (*readpages)(struct file *filp, struct address_space *mapping, + struct list_head *pages, unsigned nr_pages); + void (*readahead)(struct readahead_control *); + + int (*write_begin)(struct file *, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata); + int (*write_end)(struct file *, struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata); + + /* Unfortunately this kludge is needed for FIBMAP. Don't use it */ + sector_t (*bmap)(struct address_space *, sector_t); + void (*invalidatepage) (struct page *, unsigned int, unsigned int); + int (*releasepage) (struct page *, gfp_t); + void (*freepage)(struct page *); + ssize_t (*direct_IO)(struct kiocb *, struct iov_iter *iter); + /* + * migrate the contents of a page to the specified target. If + * migrate_mode is MIGRATE_ASYNC, it must not block. + */ + int (*migratepage) (struct address_space *, + struct page *, struct page *, enum migrate_mode); + bool (*isolate_page)(struct page *, isolate_mode_t); + void (*putback_page)(struct page *); + int (*launder_page) (struct page *); + int (*is_partially_uptodate) (struct page *, unsigned long, + unsigned long); + void (*is_dirty_writeback) (struct page *, bool *, bool *); + int (*error_remove_page)(struct address_space *, struct page *); + + /* swapfile support */ + int (*swap_activate)(struct swap_info_struct *sis, struct file *file, + sector_t *span); + void (*swap_deactivate)(struct file *file); + }; + + +.. slide:: Reading data + :level: 2 + :inline-contents: True + + .. code-block:: c + + /** + * generic_file_read_iter - generic filesystem read routine + * @iocb: kernel I/O control block + * @iter: destination for the data read + * + * This is the "read_iter()" routine for all filesystems + * that can use the page cache directly. + * + * The IOCB_NOWAIT flag in iocb->ki_flags indicates that -EAGAIN shall + * be returned when no data can be read without waiting for I/O requests + * to complete; it doesn't prevent readahead. + * + * The IOCB_NOIO flag in iocb->ki_flags indicates that no new I/O + * requests shall be made for the read or for readahead. When no data + * can be read, -EAGAIN shall be returned. When readahead would be + * triggered, a partial, possibly empty read shall be returned. + * + * Return: + * * number of bytes copied, even for partial reads + * * negative error code (or 0 if IOCB_NOIO) if nothing was read + */ + ssize_t + generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) + + /* + * Generic "read page" function for block devices that have the normal + * get_block functionality. This is most of the block device filesystems. + * Reads the page asynchronously --- the unlock_buffer() and + * set/clear_buffer_uptodate() functions propagate buffer state into the + * page struct once IO has completed. + */ + int block_read_full_page(struct page *page, get_block_t *get_block) + diff --git a/Documentation/teaching/lectures/interrupts.rst b/Documentation/teaching/lectures/interrupts.rst new file mode 100644 index 00000000000000..ecb43bbd4d0ede --- /dev/null +++ b/Documentation/teaching/lectures/interrupts.rst @@ -0,0 +1,976 @@ +========== +Interrupts +========== + +`View slides `_ + +.. slideconf:: + :autoslides: False + :theme: single-level + +Lecture objectives +================== + +.. slide:: Interrupts + :inline-contents: True + :level: 2 + + * Interrupts and exceptions (x86) + + * Interrupts and exceptions (Linux) + + * Deferrable work + + * Timers + +What is an interrupt? +===================== + +An interrupt is an event that alters the normal execution flow of a +program and can be generated by hardware devices or even by the CPU +itself. When an interrupt occurs the current flow of execution is +suspended and interrupt handler runs. After the interrupt handler runs +the previous execution flow is resumed. + +Interrupts can be grouped into two categories based on the source of +the interrupt. They can also be grouped into two other categories based +on the ability to postpone or temporarily disable the interrupt: + +.. slide:: Interrupts + :inline-contents: True + :level: 2 + + * **synchronous**, generated by executing an instruction + + * **asynchronous**, generated by an external event + + * **maskable** + + * can be ignored + + * signaled via INT pin + + * **non-maskable** + + * cannot be ignored + + * signaled via NMI pin + +Synchronous interrupts, usually named exceptions, handle conditions detected by the +processor itself in the course of executing an instruction. Divide by zero or +a system call are examples of exceptions. + +Asynchronous interrupts, usually named interrupts, are external events generated +by I/O devices. For example a network card generates an interrupts to signal +that a packet has arrived. + +Most interrupts are maskable, which means we can temporarily postpone +running the interrupt handler when we disable the interrupt until the +time the interrupt is re-enabled. However, there are a few critical +interrupts that can not be disabled/postponed. + +Exceptions +---------- + +There are two sources for exceptions: + +.. slide:: Exceptions + :inline-contents: True + :level: 2 + + * processor detected + + - **faults** + + - **traps** + + - **aborts** + + * programmed + + - **int n** + +Processor detected exceptions are raised when an abnormal condition is +detected while executing an instruction. + +A fault is a type of exception that is reported before the execution of the +instruction and can be usually corrected. The saved EIP is the address of +the instruction that caused the fault, so after the fault is corrected +the program can re-execute the faulty instruction. (e.g page fault). + +A trap is a type of exception that is reported after the execution of the +instruction in which the exception was detected. The saved EIP is the address +of the instruction after the instruction that caused the trap. (e.g debug trap). + +Quiz: interrupt terminology +--------------------------- + +.. slide:: Quiz: interrupt terminology + :inline-contents: True + :level: 2 + + For each of the following terms on the left select all the terms + from right that best describe them. + + .. hlist:: + :columns: 2 + + * Watchdog + * Demand paging + * Division by zero + * Timer + * System call + * Breakpoint + + * Exception + * Interrupt + * Maskable + * Nonmaskable + * Trap + * Fault + + + +Hardware Concepts +================= + +Programmable Interrupt Controller +--------------------------------- + +.. slide:: Programmable Interrupt Controller + :inline-contents: True + :level: 2 + + |_| + + .. ditaa:: + + +-----------+ NMI + | | + | |<----------+ + | | + | | +------------+ + | | | | IRQ0 + | | | |<------------+ device0 + | CPU | | | IRQ1 + | | INTR | PIC |<------------+ device1 + | |<----------+ | IRQN + | | | |<------------+ deviceN + | | | | + +-----------+ +------------+ + +A device supporting interrupts has an output pin used for signaling an Interrupt ReQuest. IRQ +pins are connected to a device named Programmable Interrupt Controller (PIC) which is connected +to CPU's INTR pin. + +A PIC usually has a set of ports used to exchange information with the CPU. When a device +connected to one of the PIC's IRQ lines needs CPU attention the following flow happens: + + * device raises an interrupt on the corresponding IRQn pin + * PIC converts the IRQ into a vector number and writes it to a port for CPU to read + * PIC raises an interrupt on CPU INTR pin + * PIC waits for CPU to acknowledge an interrupt before raising another interrupt + * CPU acknowledges the interrupt then it starts handling the interrupt + +Will see later how the CPU handles the interrupt. Notice that by +design PIC won't raise another interrupt until the CPU acknowledged +the current interrupt. + +.. note:: + + Once the interrupt is acknowledged by the CPU the interrupt + controller can request another interrupt, regardless if the CPU + finished handled the previous interrupt or not. Thus, depending on + how the OS controls the CPU it is possible to have nested + interrupts. + +The interrupt controller allows each IRQ line to be individually +disabled. This allows simplifying design by making sure that interrupt +handlers are always executed serially. + +Interrupt controllers in SMP systems +------------------------------------ + +In SMP systems we may have multiple interrupt controllers in the +systems. + +For example, on the x86 architecture each core has a local APIC used +to process interrupts from locally connected devices like timers or +thermals sensors. Then there is an I/O APIC is used to distribute IRQ +from external devices to CPU cores. + +.. slide:: Interrupt controllers in SMP systems + :inline-contents: True + :level: 2 + + |_| + + .. ditaa:: + + + CPU0 CPU1 + +-------------+ +-------------+ + | | | | + | |local IRQs | |local IRQs + | +---------- | +---------- + | | | | + | local APIC | | local APIC | + | | LINT0, LINT1 | | LINT0, LINT1 + | +------------- | +------------- + | | | | + +-------+-----+ +------+------+ + | | + | | + | | + +-------+--------------------------------+------+ + | | + | Interrupt Controller Communication BUS | + +----------------------+------------------------+ + | + | + +--------+--------+ + | | + | I/O APIC | + | | + +--------+--------+ + | + | + | + External interrupts + + + +Interrupt Control +----------------- + +In order to synchronize access to shared data between the interrupt handler +and other potential concurrent activities such as driver initialization or +driver data processing, it is often required to enable and disable interrupts in +a controlled fashion. + +This can be accomplished at several levels: + +.. slide:: Enabling/disabling the interrupts + :inline-contents: True + :level: 2 + + * at the device level + + * by programming the device control registers + + * at the PIC level + + * PIC can be programmed to disable a given IRQ line + + * at the CPU level; for example, on x86 one can use the following + instructions: + + * cli (CLear Interrupt flag) + * sti (SeT Interrupt flag) + + +Interrupt priorities +--------------------- + +Most architectures also support interrupt priorities. When this is +enabled, it permits interrupt nesting only for those interrupts that +have a higher priority than the current priority level. + +.. slide:: Interrupt priorities + :inline-contents: True + :level: 2 + + |_| + + .. ditaa:: + + Process + context + | + v + IRQ10 | irq10 handler + -----------------------------> +-------------+ + | + IRQ20 (lower priority) | + -----------------------------> pending v + | + IRQ5 (higher priority) | irq5 handler + -----------------------------> +-------->---------+ + | + v + | + +--------<---------+ + | + v + | + -------<-------+ + irq20 handler + Pending IRQ20 ------->-------+ + | + v + | + +--------------+ + | + v + + +.. note:: + + Not all architectures support interrupt priorities. It is also + difficult to support defining a generic scheme for interrupt + priorities for general use OSes and some kernels (Linux included) + do not use interrupt priorities. On the other hand most RTOS use + interrupt priorities since they are typically used in more + constraint use-cases where it is easier to define interrupt + priorities. + + +Quiz: hardware concepts +----------------------- + +.. slide:: Quiz: hardware concepts + :inline-contents: True + :level: 2 + + Which of the following statements are true? + + * The CPU can start processing a new interrupt before the current + one is finished + + * Interrupts can be disabled at the device level + + * Lower priority interrupts can not preempt handlers for higher + priority interrupts + + * Interrupts can be disabled at the interrupt controller level + + * On SMP systems the same interrupt can be routed to different CPUs + + * Interrupts can be disabled at the CPU level + + +Interrupt handling on the x86 architecture +========================================== + +This section will examine how interrupts are handled by the CPU on the +x86 architecture. + +Interrupt Descriptor Table +-------------------------- + +The interrupt descriptor table (IDT) associates each interrupt or exception +identifier with a descriptor for the instructions that service the associated +event. We will name the identifier as vector number and the associated +instructions as interrupt/exception handler. + +An IDT has the following characteristics: + +.. slide:: Interrupt Descriptor Table + :inline-contents: True + :level: 2 + + * it is used as a jump table by the CPU when a given vector is triggered + * it is an array of 256 x 8 bytes entries + * may reside anywhere in physical memory + * processor locates IDT by the means of IDTR + +Below we can find Linux IRQ vector layout. The first 32 entries are reserved +for exceptions, vector 128 is used for syscall interface and the rest are +used mostly for hardware interrupts handlers. + +.. slide:: Linux IRQ vector layout + :inline-contents: True + :level: 2 + + |_| + + .. ditaa:: + + arch/x86/include/asm/irq_vectors.h + +------+ + | 0 | 0..31, system traps and exceptions + +------+ + | 1 | + +------+ + | | + +------+ + | | + | | + | | + +------+ + | 32 | 32..127, device interrupts + +------+ + | | + | | + | | + +------+ + | 128 | int80 syscall interface + +------+ + | 129 | 129..255, other interrupts + +------+ + | | + | | + | | + +------+ + | 255 | + +------+ + +On x86 an IDT entry has 8 bytes and it is named gate. There can be 3 types of gates: + + * interrupt gate, holds the address of an interrupt or exception handler. + Jumping to the handler disables maskable interrupts (IF flag is cleared). + * trap gates, similar to an interrupt gate but it does not disable maskable + interrupts while jumping to interrupt/exception handler. + * task gates (not used in Linux) + +Let's have a look at several fields of an IDT entry: + + * segment selector, index into GDT/LDT to find the start of the code segment where + the interrupt handlers reside + * offset, offset inside the code segment + * T, represents the type of gate + * DPL, minimum privilege required for using the segments content. + +.. slide:: Interrupt descriptor table entry (gate) + :inline-contents: True + :level: 2 + + |_| + + .. ditaa:: + + 63 47 42 32 + +------------------------------+---+---+----+---+---------------+ + | | | D | | | | + | offset (16..31 | P | P | | T | | + | | | L | | | | + +------------------------------+---+---+----+---+---------------+ + | | | + | segment selector | offset (0..15) | + | | | + +------------------------------+--------------------------------+ + 31 15 0 + + +Interrupt handler address +------------------------- + +In order to find the interrupt handler address we first need to find the start +address of the code segment where interrupt handler resides. For this we +use the segment selector to index into GDT/LDT where we can find the corresponding +segment descriptor. This will provide the start address kept in the 'base' field. +Using base address and the offset we can now go to the start of the interrupt handler. + + +.. slide:: Interrupt handler address + :inline-contents: True + :level: 2 + + |_| + + .. ditaa:: + + + Interrupt Descriptor + +----------------------------------------------+ + | | + | +------------------+ +--------+ +------+ | + | | segment selector | | offset| | PL | | + | +----+-------------+ +---+----+ +------+ | + | | | | + +----------------------------------------------+ + | | + | | + +-------------+ +----------------------------> +---------------+ + | ^ | ISR address | + | Segment Descriptor | +---------------+ + | +----------------------------------------------+ | + | | | | + +---->| +------------------+ +--------+ +------+ | | + | | base | | limit | | PL | | | + | +---------+--------+ +--------+ +------+ | | + | | | | + +----------------------------------------------+ | + | | + +--------------------------------------------+ + + +Stack of interrupt handler +-------------------------- + +Similar to control transfer to a normal function, a control transfer +to an interrupt or exception handler uses the stack to store the +information needed for returning to the interrupted code. + +As can be seen in the figure below, an interrupt pushes the EFLAGS register +before saving the address of the interrupted instruction. Certain types +of exceptions also cause an error code to be pushed on the stack to help +debug the exception. + + +.. slide:: Interrupt handler stack + :inline-contents: True + :level: 2 + + |_| + + .. ditaa:: + + + w/o privilege transition w/ privilege transition + + + +---------------------+ +---------------------+ + | | | | | + | | | OLD SS:ESP | OLD SS | NEW SS:ESP from TSS + | +---------------------+ +---------------------+ + | | | | | + | | OLD EFLAGS | | OLD ESP | + | +---------------------+ +---------------------+ + | | | | | + | | OLD CS | | OLD EFLAGS | + | +---------------------+ +---------------------+ + | | | | | + | | OLD EIP | | OLD CS | + | +---------------------+ +---------------------+ + | | | | | + | | (error code) | NEW SS:ESP | OLD EIP | + | +---------------------+ +---------------------+ + | | | | | + | | | | (error code) | NEW SS:ESP + | | | +---------------------+ + | | | | | + | | | | | + | | | | | + | | | | | + | | | | | + | | | | | + | | | | | + v +---------------------+ +---------------------+ + + +Handling an interrupt request +----------------------------- + +After an interrupt request has been generated the processor runs a sequence of +events that eventually end up with running the kernel interrupt handler: + + +.. slide:: Handling an interrupt request + :inline-contents: True + :level: 2 + + + * CPU checks the current privilege level + * if need to change privilege level + + * change stack with the one associated with new privilege + * save old stack information on the new stack + + * save EFLAGS, CS, EIP on stack + * save error code on stack in case of an abort + * execute the kernel interrupt handler + +Returning from an interrupt handler +----------------------------------- + +Most architectures offer special instructions to clean up the stack and resume +the execution after the interrupt handler has been executed. On x86 IRET is used +to return from an interrupt handler. IRET is similar to RET except that IRET +increments ESP by extra four bytes (because of the flags on stack) and moves the +saved flags into EFLAGS register. + +To resume the execution after an interrupt the following sequence is used (x86): + +.. slide:: Returning from an interrupt + :inline-contents: True + :level: 2 + + * pop the error code (in case of an abort) + * call IRET + + * pops values from the stack and restore the following register: CS, EIP, EFLAGS + * if privilege level changed returns to the old stack and old privilege level + +Inspecting the x86 interrupt handling +------------------------------------- + +.. slide:: Inspecting the x86 interrupt handling + :inline-contents: True + :level: 2 + + |_| + + .. asciicast:: ../res/intr_x86.cast + + +Quiz: x86 interrupt handling +---------------------------- + +.. slide:: Quiz: x86 interrupt handling + :inline-contents: True + :level: 2 + + The following gdb commands are used to determine the handler for + the int80 based system call exception. Select and arrange the + commands or output of the commands in the correct order. + + .. code-block:: gdb + + (void *) 0xc15de780 + + set $idtr_addr=($idtr_entry>>48<<16)|($idtr_entry&0xffff) + + print (void*)$idtr_addr + + set $idtr = 0xff800000 + + (void *) 0xc15de874 + + set $idtr = 0xff801000 + + set $idtr_entry = *(uint64_t*)($idtr + 8 * 128) + + monitor info registers + +Interrupt handling in Linux +=========================== + +In Linux the interrupt handling is done in three phases: critical, immediate and +deferred. + +In the first phase the kernel will run the generic interrupt handler that +determines the interrupt number, the interrupt handler for this particular +interrupt and the interrupt controller. At this point any timing critical +actions will also be performed (e.g. acknowledge the interrupt at the interrupt +controller level). Local processor interrupts are disabled for the duration of +this phase and continue to be disabled in the next phase. + +In the second phase, all of the device driver's handlers associated with this +interrupt will be executed. At the end of this phase, the interrupt controller's +"end of interrupt" method is called to allow the interrupt controller to +reassert this interrupt. The local processor interrupts are enabled at this +point. + +.. note:: + + It is possible that one interrupt is associated with multiple + devices and in this case it is said that the interrupt is + shared. Usually, when using shared interrupts it is the + responsibility of the device driver to determine if the interrupt + is target to its device or not. + +Finally, in the last phase of interrupt handling interrupt context deferrable +actions will be run. These are also sometimes known as "bottom half" of the +interrupt (the upper half being the part of the interrupt handling that runs +with interrupts disabled). At this point, interrupts are enabled on the local +processor. + +.. slide:: Interrupt handling in Linux + :inline-contents: True + :level: 2 + + |_| + + .. ditaa:: + + + phase 1 + +----------------+ + | critical | phase 2 + +----------------+ +-----------------+ + | | | immediate | phase 3 + | - IRQ disabled | +-----------------+ +----------------+ + | - ACK IRQ +-----+ | | | deferred | + | | +---> - IRQ disabled | +----------------+ + +----------------+ | - device handler| | | + | - EOI IRQ +-----+ | - IRQ enabled | + +-----------------+ +----> - execute later| + | | + +----------------+ + + +Nested interrupts and exceptions +-------------------------------- + +Linux used to support nested interrupts but this was removed some time +ago in order to avoid increasingly complex solutions to stack +overflows issues - allow just one level of nesting, allow multiple +levels of nesting up to a certain kernel stack depth, etc. + +However, it is still possible to have nesting between exceptions and +interrupts but the rules are fairly restrictive: + +.. slide:: IRQ and exception nesting in Linux + :inline-contents: True + :level: 2 + + * an exception (e.g. page fault, system call) can not preempt an interrupt; + if that occurs it is considered a bug + + * an interrupt can preempt an exception + + * an interrupt can not preempt another interrupt (it used to be possible) + + +The diagram below shows the possible nesting scenarios: + +.. slide:: Interrupt/Exception nesting + :inline-contents: True + :level: 2 + + |_| + + .. ditaa:: + + + ^ + ^ + | | | | + | Syscall | IRQi| | + User Mode | Exception (e.g. page fault) | | | + | | | | + +------------------------------------+-----+-----------------+ + | iret| | iret^ IRQj| iret| + | | | | | | + Kernel Mode v-------+ ^-------+ ^------+ v-----+ v-----+ + | | | | + IRQi| iret| IRQj| iret| + v------+ v------+ + +Interrupt context +----------------- + +While an interrupt is handled (from the time the CPU jumps to the interrupt +handler until the interrupt handler returns - e.g. IRET is issued) it is said +that code runs in "interrupt context". + +Code that runs in interrupt context has the following properties: + +.. slide:: Interrupt context + :inline-contents: True + :level: 2 + + * it runs as a result of an IRQ (not of an exception) + * there is no well defined process context associated + * not allowed to trigger a context switch (no sleep, schedule, or user memory access) + +Deferrable actions +------------------ + +Deferrable actions are used to run callback functions at a later time. If +deferrable actions scheduled from an interrupt handler, the associated callback +function will run after the interrupt handler has completed. + +There are two large categories of deferrable actions: those that run in +interrupt context and those that run in process context. + +The purpose of interrupt context deferrable actions is to avoid doing too much +work in the interrupt handler function. Running for too long with interrupts +disabled can have undesired effects such as increased latency or poor system +performance due to missing other interrupts (e.g. dropping network packets +because the CPU did not react in time to dequeue packets from the network +interface and the network card buffer is full). + +Deferrable actions have APIs to: **initialize** an instance, **activate** or +**schedule** the action and **mask/disable** and **unmask/enable** the execution +of the callback function. The latter is used for synchronization purposes between +the callback function and other contexts. + +Typically the device driver will initialize the deferrable action +structure during the device instance initialization and will activate +/ schedule the deferrable action from the interrupt handler. + +.. slide:: Deferrable actions + :inline-contents: False + :level: 2 + + + * Schedule callback functions to run at a later time + + * Interrupt context deferrable actions + + * Process context deferrable actions + + * APIs for initialization, scheduling, and masking + +Soft IRQs +--------- + +Soft IRQs is the term used for the low-level mechanism that implements deferring +work from interrupt handlers but that still runs in interrupt context. + +.. slide:: Soft IRQs + :inline-contents: True + :level: 2 + + Soft IRQ APIs: + + * initialize: :c:func:`open_softirq` + * activation: :c:func:`raise_softirq` + * masking: :c:func:`local_bh_disable`, :c:func:`local_bh_enable` + + Once activated, the callback function :c:func:`do_softirq` runs either: + + * after an interrupt handler or + * from the ksoftirqd kernel thread + + +Since softirqs can reschedule themselves or other interrupts can occur that +reschedules them, they can potentially lead to (temporary) process starvation if +checks are not put into place. Currently, the Linux kernel does not allow +running soft irqs for more than :c:macro:`MAX_SOFTIRQ_TIME` or rescheduling for +more than :c:macro:`MAX_SOFTIRQ_RESTART` consecutive times. + +Once these limits are reached a special kernel thread, **ksoftirqd** is woken up +and all of the rest of pending soft irqs will be run from the context of this +kernel thread. + +.. slide:: ksoftirqd + :inline-contents: False + :level: 2 + + * minimum priority kernel thread + * runs softirqs after certain limits are reached + * tries to achieve good latency and avoid process starvation + +Soft irqs usage is restricted, they are use by a handful of subsystems that have +low latency requirements and high frequency: + +.. slide:: Types of soft IRQs + :inline-contents: True + :level: 2 + + .. code-block:: c + + /* PLEASE, avoid to allocate new softirqs, if you need not _really_ high + frequency threaded job scheduling. For almost all the purposes + tasklets are more than enough. F.e. all serial device BHs et + al. should be converted to tasklets, not to softirqs. + */ + + enum + { + HI_SOFTIRQ=0, + TIMER_SOFTIRQ, + NET_TX_SOFTIRQ, + NET_RX_SOFTIRQ, + BLOCK_SOFTIRQ, + IRQ_POLL_SOFTIRQ, + TASKLET_SOFTIRQ, + SCHED_SOFTIRQ, + HRTIMER_SOFTIRQ, + RCU_SOFTIRQ, /* Preferable RCU should always be the last softirq */ + + NR_SOFTIRQS + }; + + +Packet flood example +--------------------- + +The following screencast will look at what happens when we flood the +system with a large number of packets. Since at least a part of the +packet processing is happening in softirq we should expect the CPU to +spend most of the time running softirqs but the majority of that +should be in the context of the `ksoftirqd` thread. + +.. slide:: Packet flood example + :inline-contents: True + :level: 2 + + |_| + + .. asciicast:: ../res/ksoftirqd-packet-flood.cast + + +Tasklets +-------- + +.. slide:: Tasklets + :inline-contents: True + :level: 2 + + Tasklets are a dynamic type (not limited to a fixed number) of + deferred work running in interrupt context. + + Tasklets API: + + * initialization: :c:func:`tasklet_init` + * activation: :c:func:`tasklet_schedule` + * masking: :c:func:`tasklet_disable`, :c:func:`tasklet_enable` + + Tasklets are implemented on top of two dedicated softirqs: + :c:macro:`TASKLET_SOFITIRQ` and :c:macro:`HI_SOFTIRQ` + + Tasklets are also serialized, i.e. the same tasklet can only execute on one processor. + + +Workqueues +---------- + + .. slide:: Workqueues + :inline-contents: True + :level: 2 + + Workqueues are a type of deferred work that runs in process context. + + They are implemented on top of kernel threads. + + Workqueues API: + + * init: :c:macro:`INIT_WORK` + * activation: :c:func:`schedule_work` + +Timers +------ + +.. slide:: Timers + :inline-contents: True + :level: 2 + + Timers are implemented on top of the :c:macro:`TIMER_SOFTIRQ` + + Timer API: + + * initialization: :c:func:`setup_timer` + * activation: :c:func:`mod_timer` + +Deferrable actions summary +-------------------------- + +Here is a cheat sheet which summarizes Linux deferrable actions: + + +.. slide:: Deferrable actions summary + :inline-contents: True + :level: 2 + + * softIRQ + + * runs in interrupt context + * statically allocated + * same handler may run in parallel on multiple cores + + * tasklet + + * runs in interrupt context + * can be dynamically allocated + * same handler runs are serialized + + * workqueues + + * run in process context + +Quiz: Linux interrupt handling +------------------------------ + +.. slide:: Quiz: Linux interrupt handling + :inline-contents: True + :level: 2 + + Which of the following phases of interrupt handling runs with + interrupts disabled at the CPU level? + + * Critical + + * Immediate + + * Deferred diff --git a/Documentation/teaching/lectures/intro.rst b/Documentation/teaching/lectures/intro.rst new file mode 100644 index 00000000000000..7d336d39ffd17a --- /dev/null +++ b/Documentation/teaching/lectures/intro.rst @@ -0,0 +1,1166 @@ +============ +Introduction +============ + +`View slides `_ + +.. slideconf:: + :autoslides: False + :theme: single-level + +Lecture objectives: +=================== + +.. slide:: Introduction + :inline-contents: True + :level: 2 + + * Basic operating systems terms and concepts + + * Overview of the Linux kernel + + +Basic operating systems terms and concepts +========================================== + +User vs Kernel +-------------- + +.. slide:: User vs Kernel + :level: 2 + + * Execution modes + + * Kernel mode + + * User mode + + * Memory protection + + * Kernel-space + + * User-space + + +Kernel and user are two terms that are often used in operating +systems. Their definition is pretty straight forward: The kernel is +the part of the operating system that runs with higher privileges +while user (space) usually means by applications running with low +privileges. + +However these terms are heavily overloaded and might have very +specific meanings in some contexts. + +User mode and kernel mode are terms that may refer specifically to the +processor execution mode. Code that runs in kernel mode can fully +[#hypervisor]_ control the CPU while code that runs in user mode has +certain limitations. For example, local CPU interrupts can only be +disabled or enable while running in kernel mode. If such an operation +is attempted while running in user mode an exception will be generated +and the kernel will take over to handle it. + +.. [#hypervisor] some processors may have even higher privileges than + kernel mode, e.g. a hypervisor mode, that is only + accessible to code running in a hypervisor (virtual + machine monitor) + +User space and kernel space may refer specifically to memory +protection or to virtual address spaces associated with either the +kernel or user applications. + +Grossly simplifying, the kernel space is the memory area that is +reserved to the kernel while user space is the memory area reserved to +a particular user process. The kernel space is accessed protected so +that user applications can not access it directly, while user space +can be directly accessed from code running in kernel mode. + + +Typical operating system architecture +------------------------------------- + +In the typical operating system architecture (see the figure below) +the operating system kernel is responsible for access and sharing the +hardware in a secure and fair manner with multiple applications. + +.. slide:: Typical operating system architecture + :level: 2 + :inline-contents: True + + .. ditaa:: + + +---------------+ +--------------+ +---------------+ -\ + | Application 1 | | Application2 | ... | Application n | | + +---------------+ +--------------+ +---------------+ |> User space + | | | | + v v v -/ + +--------------------------------------------------------+ -\ + | System Call Interface | | + +--------------------------------------------------------+ | + | | | | + v v v |> Kernel space + +--------------------------------------------------------+ | + | Kernel | | + +--------------------------------------------------------+ | + | Device drivers | | + +--------------------------------------------------------+ -/ + | | | -\ + v v v |> Hardware + -/ + + + +The kernel offers a set of APIs that applications issue which are +generally referred to as "System Calls". These APIs are different from +regular library APIs because they are the boundary at which the +execution mode switch from user mode to kernel mode. + +In order to provide application compatibility, system calls are rarely +changed. Linux particularly enforces this (as opposed to in kernel +APIs that can change as needed). + +The kernel code itself can be logically separated in core kernel +code and device drivers code. Device drivers code is responsible of +accessing particular devices while the core kernel code is +generic. The core kernel can be further divided into multiple logical +subsystems (e.g. file access, networking, process management, etc.) + + +Monolithic kernel +----------------- + +A monolithic kernel is one where there is no access protection between +the various kernel subsystems and where public functions can be +directly called between various subsystems. + + +.. slide:: Monolithic kernel + :level: 2 + :inline-contents: True + + .. ditaa:: + + +-----+ +-----+ +-----+ + | App | | App | | App | + +-----+ +-----+ +-----+ + | | | User + =--|-------=--------|--------=-------|-------------------=- + | | | Kernel + v v v + +--------------------------------------------------------+ + | System Call Interface | + +--------------------------------------------------------+ + | | + v v + +-----+ +-----+ + | |<---------------------------->| | Kernel + | |<---+ +------->| | functions + +--+--+ | | +-----+ + | | | ^ + | | +-----+ | | + |+------+---->| |<---+ | + || | +-----+ | + || | | + vv | v + +--++-+ | +-----+ + | | +------------------------>| | Device + | |<---------------------------->| | Drivers + +--+--+ +--+--+ + | | + v v + +--------------------------------------------------------+ + | Hardware | + +--------------------------------------------------------+ + + +However, most monolithic kernels do enforce a logical separation +between subsystems especially between the core kernel and device +drivers with relatively strict APIs (but not necessarily fixed in +stone) that must be used to access services offered by one subsystem +or device drivers. This, of course, depends on the particular kernel +implementation and the kernel's architecture. + + +Micro kernel +------------ + +A micro-kernel is one where large parts of the kernel are protected +from each-other, usually running as services in user space. Because +significant parts of the kernel are now running in user mode, the +remaining code that runs in kernel mode is significantly smaller, hence +micro-kernel term. + +.. slide:: Micro-kernel + :level: 2 + :inline-contents: True + + .. ditaa:: + + +-----+ +--------+ +---------+ +---------+ + | App | | File | | Network | | Display |<--+ + | | | Server | | Server | | Server |-+ | + +-----+ +--------+ +---------+ +---------+ | | + | ^ | | User + -|-|----------------------------------------=-|-|-------=- + | | | | Kernel + | | | | + | | | | + | | | | + | | Reply +----------------------------+ | | + | +--------| |----+ | + +--------->| Micro kernel |------+ + Request | (IPC, Memory, Scheduler) | + | | + +----------------------------+ + | + v + +--------------------------------------------------------+ + | Hardware | + +--------------------------------------------------------+ + + +In a micro-kernel architecture the kernel contains just enough code +that allows for message passing between different running +processes. Practically that means implement the scheduler and an IPC +mechanism in the kernel, as well as basic memory management to setup +the protection between applications and services. + +One of the advantages of this architecture is that the services are +isolated and hence bugs in one service won't impact other services. + +As such, if a service crashes we can just restart it without affecting +the whole system. However, in practice this is difficult to achieve +since restarting a service may affect all applications that depend on +that service (e.g. if the file server crashes all applications with +opened file descriptors would encounter errors when accessing them). + +This architecture imposes a modular approach to the kernel and offers +memory protection between services but at a cost of performance. What +is a simple function call between two services on monolithic kernels +now requires going through IPC and scheduling which will incur a +performance penalty [#minix-vs-linux]_. + +.. [#minix-vs-linux] https://lwn.net/Articles/220255/ + + +Micro-kernels vs monolithic kernels +----------------------------------- + +Advocates of micro-kernels often suggest that micro-kernel are +superior because of the modular design a micro-kernel +enforces. However, monolithic kernels can also be modular and there +are several approaches that modern monolithic kernels use toward this +goal: + +.. slide:: Monolithic kernels *can* be modular + :level: 2 + :inline-contents: True + + * Components can enabled or disabled at compile time + + * Support of loadable kernel modules (at runtime) + + * Organize the kernel in logical, independent subsystems + + * Strict interfaces but with low performance overhead: macros, + inline functions, function pointers + + +There is a class of operating systems that (used to) claim to be +hybrid kernels, in between monolithic and micro-kernels (e.g. Windows, +Mac OS X). However, since all of the typical monolithic services run +in kernel-mode in these operating systems, there is little merit to +qualify them other then monolithic kernels. + +.. slide:: "Hybrid" kernels + :level: 2 + :inline-contents: True + + Many operating systems and kernel experts have dismissed the label + as meaningless, and just marketing. Linus Torvalds said of this + issue: + + "As to the whole 'hybrid kernel' thing - it's just marketing. It's + 'oh, those microkernels had good PR, how can we try to get good PR + for our working kernel? Oh, I know, let's use a cool name and try + to imply that it has all the PR advantages that that other system + has'." + + +Address space +------------- + +.. slide:: Address space + :level: 2 + + * Physical address space + + * RAM and peripheral memory + + * Virtual address space + + * How the CPU sees the memory (when in protected / paging mode) + + * Process address space + + * Kernel address space + + +The address space term is an overload term that can have different +meanings in different contexts. + +The physical address space refers to the way the RAM and device +memories are visible on the memory bus. For example, on 32bit Intel +architecture, it is common to have the RAM mapped into the lower +physical address space while the graphics card memory is mapped high +in the physical address space. + +The virtual address space (or sometimes just address space) refers to +the way the CPU sees the memory when the virtual memory module is +activated (sometime called protected mode or paging enabled). The +kernel is responsible of setting up a mapping that creates a virtual +address space in which areas of this space are mapped to certain +physical memory areas. + +Related to the virtual address space there are two other terms that +are often used: process (address) space and kernel (address) space. + +The process space is (part of) the virtual address space associated +with a process. It is the "memory view" of processes. It is a +continuous area that starts at zero. Where the process's address space +ends depends on the implementation and architecture. + +The kernel space is the "memory view" of the code that runs in kernel +mode. + + +User and kernel sharing the virtual address space +------------------------------------------------- + +A typical implementation for user and kernel spaces is one where the +virtual address space is shared between user processes and the kernel. + +In this case kernel space is located at the top of the address space, +while user space at the bottom. In order to prevent the user processes +from accessing kernel space, the kernel creates mappings that prevent +access to the kernel space from user mode. + +.. slide:: User and kernel sharing the virtual address space + :level: 2 + :inline-contents: True + + .. ditaa:: + + +-------------------+ ^ + 0xFFFFFFFF | | | + | | | Kernel space + | | | + +-------------------+ v + 0xC0000000 | | ^ + | | | User space + | | | + | | | + | | | + | | | + | | | + | | | + | | | + 0x00000000 +-------------------+ v + + 32bit Virtual Address Space + +Execution contexts +------------------ + +.. slide:: Execution contexts + :level: 2 + + * Process context + + * Code that runs in user mode, part of a process + + * Code that runs in kernel mode, as a result of a system call + issued by a process + + * Interrupt context + + * Code that runs as a result of an interrupt + + * Always runs in kernel mode + + +One of the most important jobs of the kernel is to service interrupts +and to service them efficiently. This is so important that a special +execution context is associated with it. + +The kernel executes in interrupt context when it runs as a result of +an interrupt. This includes the interrupt handler, but it is not +limited to it, there are other special (software) constructs that run +in interrupt mode. + +Code running in interrupt context always runs in kernel mode and there +are certain limitations that the kernel programmer has to be aware of +(e.g. not calling blocking functions or accessing user space). + +Opposed to interrupt context there is process context. Code that runs +in process context can do so in user mode (executing application code) +or in kernel mode (executing a system call). + + +Multi-tasking +------------- + +.. slide:: Multi-tasking + :level: 2 + + * An OS that supports the "simultaneous" execution of multiple processes + + * Implemented by fast switching between running processes to allow + the user to interact with each program + + * Implementation: + + * Cooperative + + * Preemptive + +Multitasking is the ability of the operating system to +"simultaneously" execute multiple programs. It does so by quickly +switching between running processes. + +Cooperative multitasking requires the programs to cooperate to achieve +multitasking. A program will run and relinquish CPU control back +to the OS, which will then schedule another program. + +With preemptive multitasking the kernel will enforce strict limits for +each process, so that all processes have a fair chance of +running. Each process is allowed to run a time slice (e.g. 100ms) +after which, if it is still running, it is forcefully preempted and +another task is scheduled. + +Preemptive kernel +----------------- + +.. slide:: Preemptive kernel + :level: 2 + :inline-contents: True + + Preemptive multitasking and preemptive kernels are different terms. + + A kernel is preemptive if a process can be preempted while running + in kernel mode. + + However, note that non-preemptive kernels may support preemptive + multitasking. + + +Pageable kernel memory +---------------------- + +.. slide:: Pageable kernel memory + :level: 2 + :inline-contents: True + + A kernel supports pageable kernel memory if parts of kernel memory + (code, data, stack or dynamically allocated memory) can be swapped + to disk. + +Kernel stack +------------ + +.. slide:: Kernel stack + :level: 2 + :inline-contents: True + + Each process has a kernel stack that is used to maintain the + function call chain and local variables state while it is executing + in kernel mode, as a result of a system call. + + The kernel stack is small (4KB - 12 KB) so the kernel developer has + to avoid allocating large structures on stack or recursive calls + that are not properly bounded. + +Portability +----------- + +In order to increase portability across various architectures and +hardware configurations, modern kernels are organized as follows at the +top level: + +.. slide:: Portability + :level: 2 + :inline-contents: True + + * Architecture and machine specific code (C & ASM) + + * Independent architecture code (C): + + * kernel core (further split in multiple subsystems) + + * device drivers + +This makes it easier to reuse code as much as possible between +different architectures and machine configurations. + + +Asymmetric MultiProcessing (ASMP) +--------------------------------- + +Asymmetric MultiProcessing (ASMP) is a way of supporting multiple +processors (cores) by a kernel, where a processor is dedicated to the +kernel and all other processors run user space programs. + +The disadvantage of this approach is that the kernel throughput +(e.g. system calls, interrupt handling, etc.) does not scale with the +number of processors and hence typical processes frequently use system +calls. The scalability of the approach is limited to very specific +systems (e.g. scientific applications). + + +.. slide:: Asymmetric MultiProcessing (ASMP) + :level: 2 + :inline-contents: True + + .. ditaa:: + + +-----------+ + | | + +------------------>| Memory |<-----------------+ + | | | | + | +-----------+ | + | ^ | + | | | + v v v + +--------------+ +---------------+ +---------------+ + | | | | | | + | Processor A | | Processor B | | Processor C | + | | | | | | + | | | +-----------+ | | +-----------+ | + | | | | Process 1 | | | | Process 1 | | + | | | +-----------+ | | +-----------+ | + | | | | | | + | +----------+ | | +-----------+ | | +-----------+ | + | | kernel | | | | Process 2 | | | | Process 2 | | + | +----------+ | | +-----------+ | | +-----------+ | + | | | | | | + | | | +-----------+ | | +-----------+ | + | | | | Process 3 | | | | Process 3 | | + | | | +-----------+ | | +-----------+ | + +--------------+ +---------------+ +---------------+ + + +Symmetric MultiProcessing (SMP) +------------------------------- + +As opposed to ASMP, in SMP mode the kernel can run on any of the +existing processors, just as user processes. This approach is more +difficult to implement, because it creates race conditions in the +kernel if two processes run kernel functions that access the same +memory locations. + +In order to support SMP the kernel must implement synchronization +primitives (e.g. spin locks) to guarantee that only one processor is +executing a critical section. + +.. slide:: Symmetric MultiProcessing (SMP) + :level: 2 + :inline-contents: True + + .. ditaa:: + + +-----------+ + | | + +------------------->| Memory |<------------------+ + | | | | + | +-----------+ | + | ^ | + | | | + v v v + +---------------+ +---------------+ +---------------+ + | | | | | | + | Processor A | | Processor B | | Processor C | + | | | | | | + | +-----------+ | | +-----------+ | | +-----------+ | + | | Process 1 | | | | Process 1 | | | | Process 1 | | + | +-----------+ | | +-----------+ | | +-----------+ | + | | | | | | + | +-----------+ | | +-----------+ | | +-----------+ | + | | Process 2 | | | | Process 2 | | | | Process 2 | | + | +-----------+ | | +-----------+ | | +-----------+ | + | | | | | | + | +-----------+ | | +-----------+ | | +-----------+ | + | | kernel | | | | kernel | | | | kernel | | + | +-----------+ | | +-----------+ | | +-----------+ | + +---------------+ +---------------+ +---------------+ + + +CPU Scalability +--------------- + +CPU scalability refers to how well the performance scales with +the number of cores. There are a few things that the kernel developer +should keep in mind with regard to CPU scalability: + +.. slide:: CPU Scalability + :level: 2 + :inline-contents: True + + * Use lock free algorithms when possible + + * Use fine grained locking for high contention areas + + * Pay attention to algorithm complexity + + +Overview of the Linux kernel +============================ + + +Linux development model +----------------------- + +.. slide:: Linux development model + :level: 2 + + * Open source, GPLv2 License + + * Contributors: companies, academia and independent developers + + * Development cycle: 3 – 4 months which consists of a 1 - 2 week + merge window followed by bug fixing + + * Features are only allowed in the merge window + + * After the merge window a release candidate is done on a weekly + basis (rc1, rc2, etc.) + +The Linux kernel is one the largest open source projects in the world +with thousands of developers contributing code and millions of lines of +code changed for each release. + +It is distributed under the GPLv2 license, which simply put, +requires that any modification of the kernel done on software that is +shipped to customer should be made available to them (the customers), +although in practice most companies make the source code publicly +available. + +There are many companies (often competing) that contribute code to the +Linux kernel as well as people from academia and independent +developers. + +The current development model is based on doing releases at fixed +intervals of time (usually 3 - 4 months). New features are merged into +the kernel during a one or two week merge window. After the merge +window, a release candidate is done on a weekly basis (rc1, rc2, etc.) + + +Maintainer hierarchy +-------------------- + +In order to scale the development process, Linux uses a hierarchical +maintainership model: + +.. slide:: Maintainer hierarchy + :level: 2 + :inline-contents: True + + * Linus Torvalds is the maintainer of the Linux kernel and merges pull + requests from subsystem maintainers + + * Each subsystem has one or more maintainers that accept patches or + pull requests from developers or device driver maintainers + + * Each maintainer has its own git tree, e.g.: + + * Linux Torvalds: git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6.git + + * David Miller (networking): git://git.kernel.org/pub/scm/linux/kernel/git/davem/net.git/ + + * Each subsystem may maintain a -next tree where developers can submit + patches for the next merge window + +Since the merge window is only a maximum of two weeks, most of the +maintainers have a -next tree where they accept new features from +developers or maintainers downstream while even when the merge window +is closed. + +Note that bug fixes are accepted even outside merge window in the +maintainer's tree from where they are periodically pulled by the +upstream maintainer regularly, for every release candidate. + + + +Linux source code layout +------------------------- + +.. slide:: Linux source code layout + :level: 2 + :inline-contents: True + + .. ditaa:: + + +-------+ + | linux | + +-+-----+ + | + +------+--------+---------+---------+--------------+--------------+ + | | | | | | | + | v v v v v v + | +------+ +-------+ +-------+ +--------+ +---------------+ +---------+ + | | arch | | block | | certs | | crypto | | Documentation | | drivers | + | +------+ +-------+ +-------+ +--------+ +---------------+ +---------+ + | + +-------+----------+--------+---------+--------+--------+---------+ + | | | | | | | | + | v v v v v v v + | +----------+ +----+ +---------+ +------+ +-----+ +--------+ +-----+ + | | firmware | | fs | | include | | init | | ipc | | kernel | | lib | + | +----------+ +----+ +---------+ +------+ +-----+ +--------+ +-----+ + | + +-----+------+---------+------------+------------+------------+ + | | | | | | | + | v v v v v v + | +----+ +-----+ +---------+ +---------+ +----------+ +-------+ + | | mm | | net | | samples | | scripts | | security | | sound | + | +----+ +-----+ +---------+ +---------+ +----------+ +-------+ + | + +------+--------+--------+ + | | | + v v v + +-------+ +-----+ +------+ + | tools | | usr | | virt | + +-------+ +-----+ +------+ + + +These are the top level of the Linux source code folders: + +* arch - contains architecture specific code; each architecture is + implemented in a specific sub-folder (e.g. arm, arm64, x86) + +* block - contains the block subsystem code that deals with reading + and writing data from block devices: creating block I/O requests, + scheduling them (there are several I/O schedulers available), + merging requests, and passing them down through the I/O stack to the + block device drivers + +* certs - implements support for signature checking using certificates + +* crypto - software implementation of various cryptography algorithms + as well as a framework that allows offloading such algorithms in + hardware + +* Documentation - documentation for various subsystems, Linux kernel + command line options, description for sysfs files and format, device + tree bindings (supported device tree nodes and format) + +* drivers - driver for various devices as well as the Linux driver + model implementation (an abstraction that describes drivers, devices + buses and the way they are connected) + +* firmware - binary or hex firmware files that are used by various + device drivers + +* fs - home of the Virtual Filesystem Switch (generic filesystem code) + and of various filesystem drivers + +* include - header files + +* init - the generic (as opposed to architecture specific) + initialization code that runs during boot + +* ipc - implementation for various Inter Process Communication system + calls such as message queue, semaphores, shared memory + +* kernel - process management code (including support for kernel + thread, workqueues), scheduler, tracing, time management, generic + irq code, locking + +* lib - various generic functions such as sorting, checksums, + compression and decompression, bitmap manipulation, etc. + +* mm - memory management code, for both physical and virtual memory, + including the page, SL*B and CMA allocators, swapping, virtual memory + mapping, process address space manipulation, etc. + +* net - implementation for various network stacks including IPv4 and + IPv6; BSD socket implementation, routing, filtering, packet + scheduling, bridging, etc. + +* samples - various driver samples + +* scripts - parts the build system, scripts used for building modules, + kconfig the Linux kernel configurator, as well as various other + scripts (e.g. checkpatch.pl that checks if a patch is conform with + the Linux kernel coding style) + +* security - home of the Linux Security Module framework that allows + extending the default (Unix) security model as well as + implementation for multiple such extensions such as SELinux, smack, + apparmor, tomoyo, etc. + +* sound - home of ALSA (Advanced Linux Sound System) as well as the + old Linux sound framework (OSS) + +* tools - various user space tools for testing or interacting with + Linux kernel subsystems + +* usr - support for embedding an initrd file in the kernel image + +* virt - home of the KVM (Kernel Virtual Machine) hypervisor + + +Linux kernel architecture +------------------------- + +.. slide:: Linux kernel architecture + :level: 2 + :inline-contents: True + + .. ditaa:: + :height: 100% + + +---------------+ +--------------+ +---------------+ + | Application 1 | | Application2 | ... | Application n | + +---------------+ +--------------+ +---------------+ + | | | + v v v + +--------------------------------------------------------+ + | Kernel | + | | + | +----------------------+ +-------------------+ | + | | Process Management | | Memory Management | | + | +----------------------+ +-------------------+ | + | | + | +------------+ +------------+ +------------+ | + | | Block I/O | | VFS | | Networking | | + | +------------+ +------------+ +------------+ | + | | + | +------------+ +------------+ +------------+ | + | | IPC | | Security | | Crypto | | + | +------------+ +------------+ +------------+ | + | | + | +------------+ +------------+ +------------+ | + | | DRM | | ALSA | | USB | | + | +------------+ +------------+ +------------+ | + | ... | + +--------------------------------------+-----------------+ + | Device drivers | arch | + | | | + | +----+ +-----+ +--------+ +----+ | +----------+ | + | |char| |block| |ethernet| |wifi| | | machine 1| | + | +----+ +-----+ +--------+ +----+ | +----------+ | + | +----------+ +-----+ +----+ +---+ | +----------+ | + | |filesystem| |input| |iio | |usb| | | machine 2| | + | +----------+ +-----+ +----+ +---+ | +----------+ | + | +-----------+ +----------+ +---+ | | + | |framebuffer| | platform | |drm| | ... | + | +-----------+ +----------+ +---+ | | + +-------------------------+----+-------+-----------------+ + | | | + v v v + + +--------------------------------------------------------+ + | Hardware | + +--------------------------------------------------------+ + + +arch +.... + +.. slide:: arch + :level: 2 + :inline-contents: True + + * Architecture specific code + + * May be further sub-divided in machine specific code + + * Interfacing with the boot loader and architecture specific + initialization + + * Access to various hardware bits that are architecture or machine + specific such as interrupt controller, SMP controllers, BUS + controllers, exceptions and interrupt setup, virtual memory handling + + * Architecture optimized functions (e.g. memcpy, string operations, + etc.) + +This part of the Linux kernel contains architecture specific code and +may be further sub-divided in machine specific code for certain +architectures (e.g. arm). + +"Linux was first developed for 32-bit x86-based PCs (386 or +higher). These days it also runs on (at least) the Compaq Alpha AXP, +Sun SPARC and UltraSPARC, Motorola 68000, PowerPC, PowerPC64, ARM, +Hitachi SuperH, IBM S/390, MIPS, HP PA-RISC, Intel IA-64, DEC VAX, AMD +x86-64 and CRIS architectures.” + +It implements access to various hardware bits that are architecture or +machine specific such as interrupt controller, SMP controllers, BUS +controllers, exceptions and interrupt setup, virtual memory handling. + +It also implements architecture optimized functions (e.g. memcpy, +string operations, etc.) + + +Device drivers +.............. + +.. slide:: Device drivers + :level: 2 + + * Unified device model + + * Each subsystem has its own specific driver interfaces + + * Many device driver types (TTY, serial, SCSI, fileystem, ethernet, + USB, framebuffer, input, sound, etc.) + +The Linux kernel uses a unified device model whose purpose is to +maintain internal data structures that reflect the state and structure +of the system. Such information includes what devices are present, +what is their status, what bus they are attached to, to what driver +they are attached, etc. This information is essential for implementing +system wide power management, as well as device discovery and dynamic +device removal. + +Each subsystem has its own specific driver interface that is tailored +to the devices it represents in order to make it easier to write +correct drivers and to reduce code duplication. + +Linux supports one of the most diverse set of device drivers type, +some examples are: TTY, serial, SCSI, fileystem, ethernet, USB, +framebuffer, input, sound, etc. + + +Process management +.................. + +.. slide:: Process management + :level: 2 + + * Unix basic process management and POSIX threads support + + * Processes and threads are abstracted as tasks + + * Operating system level virtualization + + * Namespaces + + * Control groups + +Linux implements the standard Unix process management APIs such as +fork(), exec(), wait(), as well as standard POSIX threads. + +However, Linux processes and threads are implemented particularly +different than other kernels. There are no internal structures +implementing processes or threads, instead there is a :c:type:`struct +task_struct` that describe an abstract scheduling unit called task. + +A task has pointers to resources, such as address space, file +descriptors, IPC ids, etc. The resource pointers for tasks that are +part of the same process point to the same resources, while resources +of tasks of different processes will point to different resources. + +This peculiarity, together with the `clone()` and `unshare()` system +call allows for implementing new features such as namespaces. + +Namespaces are used together with control groups (cgroup) to implement +operating system virtualization in Linux. + +cgroup is a mechanism to organize processes hierarchically and +distribute system resources along the hierarchy in a controlled and +configurable manner. + + +Memory management +................. + +Linux memory management is a complex subsystem that deals with: + +.. slide:: Memory management + :level: 2 + :inline-contents: True + + * Management of the physical memory: allocating and freeing memory + + * Management of the virtual memory: paging, swapping, demand + paging, copy on write + + * User services: user address space management (e.g. mmap(), brk(), + shared memory) + + * Kernel services: SL*B allocators, vmalloc + + + +Block I/O management +.................... + +The Linux Block I/O subsystem deals with reading and writing data from +or to block devices: creating block I/O requests, transforming block I/O +requests (e.g. for software RAID or LVM), merging and sorting the +requests and scheduling them via various I/O schedulers to the block +device drivers. + +.. slide:: Block I/O management + :level: 2 + :inline-contents: True + + .. ditaa:: + :height: 100% + + +---------------------------------+ + | Virtual Filesystem Switch | + +---------------------------------+ + ^ + | + v + +---------------------------------+ + | Device Mapper | + +---------------------------------+ + ^ + | + v + +---------------------------------+ + | Generic Block Layer | + +---------------------------------+ + ^ + | + v + +--------------------------------+ + | I/O scheduler | + +--------------------------------+ + ^ ^ + | | + v v + +--------------+ +--------------+ + | Block device | | Block device | + | driver | | driver | + +--------------+ +--------------+ + + +Virtual Filesystem Switch +......................... + +The Linux Virtual Filesystem Switch implements common / generic +filesystem code to reduce duplication in filesystem drivers. It +introduces certain filesystem abstractions such as: + +* inode - describes the file on disk (attributes, location of data + blocks on disk) + +* dentry - links an inode to a name + +* file - describes the properties of an opened file (e.g. file + pointer) + +* superblock - describes the properties of a formatted filesystem + (e.g. number of blocks, block size, location of root directory on + disk, encryption, etc.) + +.. slide:: Virtual Filesystem Switch + :level: 2 + :inline-contents: True + + .. ditaa:: + :height: 100% + + + ^ ^ ^ + | stat | open | read + v v v + +------------------------------------------------------------+ + | Virtual Filesystem Switch | + | | + | | + | /-------\ /--------\ /--------\ | + | | inode |<----------+ dentry |<----------+ FILE | | + | \---+---/ \----+---/ \---+----/ | + | | | | | + | | | | | + | v v v | + | +-------+ +--------+ +-------+ | + | | inode | | dentry | | page | | + | | cache | | cache | | cache | | + | +-------+ +--------+ +-------+ | + | | + +------------------------------------------------------------+ + ^ ^ + | | + v v + +-------------+ +-------------+ + | Filesystem | | Filesystem | + | driver | | driver | + +-------------+ +-------------+ + + +The Linux VFS also implements a complex caching mechanism which +includes the following: + +* the inode cache - caches the file attributes and internal file + metadata + +* the dentry cache - caches the directory hierarchy of a filesystem + +* the page cache - caches file data blocks in memory + + + +Networking stack +................ + +.. slide:: Networking stack + :level: 2 + :inline-contents: True + + .. ditaa:: + :height: 100% + + +---------------------------+ + | Berkeley Socket Interface | + +---------------------------+ + + +---------------------------+ + | Transport layer | + +-------------+-------------+ + | TCP | UDP | + +-------------+-------------+ + + +---------------------------+ + | Network layer | + +-----+---------+-----------+ + | IP | Routing | NetFilter | + +-----+---------+-----------+ + + +---------------------------+ + | Data link layer | + +-------+-------+-----------+ + | ETH | ARP | BRIDGING | + +-------+-------+-----------+ + + +---------------------------+ + | Queuing discipline | + +---------------------------+ + + +---------------------------+ + | Network device drivers | + +---------------------------+ + +Linux Security Modules +...................... + +.. slide:: Linux Security Modules + :level: 2 + :inline-contents: True + + * Hooks to extend the default Linux security model + + * Used by several Linux security extensions: + + * Security Enhancened Linux + + * AppArmor + + * Tomoyo + + * Smack diff --git a/Documentation/teaching/lectures/memory-management.rst b/Documentation/teaching/lectures/memory-management.rst new file mode 100644 index 00000000000000..b6401eec9ef0fb --- /dev/null +++ b/Documentation/teaching/lectures/memory-management.rst @@ -0,0 +1,484 @@ +================= +Memory Management +================= + +`View slides `_ + +.. slideconf:: + :autoslides: False + :theme: single-level + +Lecture objectives: +=================== + +.. slide:: Memory Management + :inline-contents: True + :level: 2 + + * Physical Memory Management + + * Page allocations + + * Small allocations + + * Virtual Memory Management + + * Page Fault Handling Overview + + +Physical Memory Management +========================== + +.. slide:: Physical Memory Management + :inline-contents: True + :level: 2 + + * Algorithms and data structure that keep track of physical memory + pages + + * Independent of virtual memory management + + * Both virtual and physical memory management is required for complete + memory management + + * Physical pages are being tracked using a special data structure: + :c:type:`struct page` + + * All physical pages have an entry reserved in the :c:data:`mem_map` + vector + + * The physical page status may include: a counter for how many + times is a page used, position in swap or file, buffers for this + page, position int the page cache, etc. + +Memory zones +------------ + +.. slide:: Memory zones + :inline-contents: True + :level: 2 + + * DMA zone + + * DMA32 zone + + * Normal zone (LowMem) + + * HighMem Zone + + * Movable Zone + + +Non-Uniform Memory Access +------------------------- + +.. slide:: Non-Uniform Memory Access + :inline-contents: True + :level: 2 + + * Physical memory is split in between multiple nodes, one for each CPU + + * There is single physical address space accessible from every node + + * Access to the local memory is faster + + * Each node maintains is own memory zones (.e. DMA, NORMAL, HIGHMEM, etc.) + + +Page allocation +--------------- + +.. slide:: Page allocation + :inline-contents: True + :level: 2 + + + .. code-block:: c + + /* Allocates 2^order contiguous pages and returns a pointer to the + * descriptor for the first page + */ + struct page *alloc_pages(gfp_mask, order); + + /* allocates a single page */ + struct page *alloc_page(gfp_mask); + + + /* helper functions that return the kernel virtual address */ + void *__get_free_pages(gfp_mask, order); + void *__get_free_page(gfp_mask); + void *__get_zero_page(gfp_mask); + void *__get_dma_pages(gfp_mask, order); + + +.. slide:: Why only allocate pages in chunks of power of 2? + :inline-contents: True + :level: 2 + + * Typical memory allocation algorithms have linear complexity + + * Why not use paging? + + * Sometime we do need contiguous memory allocations (for DMA) + + * Allocation would require page table changes and TLB flushes + + * Not able to use extended pages + + * Some architecture directly (in hardware) linearly maps a part + of the address space (e.g. MIPS) + + +.. slide:: The buddy algorithm + :inline-contents: True + :level: 2 + + * Free blocks are distributed in multiple lists + + * Each list contains blocks of the same size + + * The block size is a power of two + + +.. slide:: Allocating a block of size N + :inline-contents: True + :level: 2 + + * If there is a free block in the N-size list, pick the first + + * If not, look for a free block in the 2N-size list + + * Split the 2N-size block in two N-size blocks and add them to the + N-size list + + * Now that we have the N-size list populated, pick the first free + block from that list + + +.. slide:: Freeing a block of size N + :inline-contents: True + :level: 2 + + * If the "buddy" is free coalesce into a 2N-size block + + * Try until no more free buddy block is found and place the + resulting block in the respective list + + +.. slide:: The Linux implementation + :inline-contents: True + :level: 2 + + * 11 lists for blocks of 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, + 1024 pages + + * Each memory zone has its own buddy allocator + + * Each zone has a vector of descriptors for free blocks, one entry + for each size + + * The descriptor contains the number of free blocks and the head of + the list + + * Blocks are linked in the list using the `lru` field of + :c:type:`struct page` + + * Free pages have the PG_buddy flag set + + * The page descriptor keeps a copy of the block size in the private + field to easily check if the "buddy" is free + + +Small allocations +----------------- + +.. slide:: Small allocations + :inline-contents: True + :level: 2 + + * Buddy is used to allocate pages + + * Many of the kernel subsystems need to allocate buffers smaller + than a page + + * Typical solution: variable size buffer allocation + + * Leads to external fragmentation + + * Alternative solution: fixed size buffer allocation + + * Leads to internal fragmentation + + * Compromise: fixed size block allocation with multiple sizes, geometrically distributed + + * e.g.: 32, 64, ..., 131056 + + +.. slide:: The SLAB allocator + :inline-contents: True + :level: 2 + + * Buffers = objects + + * Uses buddy to allocate a pool of pages for object allocations + + * Each object (optionally) has a constructor and destructor + + * Deallocated objects are cached - avoids subsequent calls for + constructors and buddy allocation / deallocation + +.. slide:: Why SLAB? + :inline-contents: True + :level: 2 + + * The kernel will typically allocate and deallocate multiple types + the same data structures over time (e.g. :c:type:`struct + task_struct`) effectively using fixed size allocations. Using the + SLAB reduces the frequency of the more heavy + allocation/deallocation operations. + + * For variable size buffers (which occurs less frequently) a + geometric distribution of caches with fixed-size can be used + + * Reduces the memory allocation foot-print since we are searching a + much smaller memory area, compared to buddy which can span over a + larger area + + * Employs cache optimization techniques (slab coloring) + + +.. slide:: Slab architecture + :inline-contents: True + :level: 2 + + .. image:: ../res/slab-overview.png + + +.. slide:: Cache descriptors + :inline-contents: True + :level: 2 + + * A name to identify the cache for stats + + * object constructor and destructor functions + + * size of the objects + + * Flags + + * Size of the slab in power of 2 pages + + * GFP masks + + * One or mores slabs, grouped by state: full, partially full, empty + +.. slide:: SLAB descriptors + :inline-contents: True + :level: 2 + + * Number of objects + + * Memory region where the objects are stored + + * Pointer to the first free object + + * Descriptor are stored either in + + * the SLAB itself (if the object size is lower the 512 or if + internal fragmentation leaves enough space for the SLAB + descriptor) + + * in generic caches internally used by the SLAB allocator + + +.. slide:: Slab detailed architecture + :inline-contents: True + :level: 2 + + .. image:: ../res/slab-detailed-arch.png + + +.. slide:: Generic vs specific caches + :inline-contents: True + :level: 2 + + * Generic caches are used internally by the slab allocator + + * allocating memory for cache and slab descriptors + + * They are also used to implement :c:func:`kmalloc` by implementing + 20 caches with object sizes geometrically distributed between + 32bytes and 4MB + + * Specific cache are created on demand by kernel subsystems + + +.. slide:: Object descriptors + :inline-contents: True + :level: 2 + + .. image:: ../res/slab-object-descriptors.png + +.. slide:: Object descriptors + :inline-contents: True + :level: 2 + + * Only used for free objects + + * An integer that points to the next free object + + * The last free object uses a terminator value + + * Internal descriptors - stored in the slab + + * External descriptors - stored in generic caches + + +.. slide:: SLAB coloring + :inline-contents: True + :level: 2 + + .. image:: ../res/slab-coloring.png + + +Virtual memory management +========================= + +.. slide:: Virtual memory management + :inline-contents: True + :level: 2 + + * Used in both kernel and user space + + * Using virtual memory requires: + + * reserving (allocating) a segment in the *virtual* address space + (be it kernel or user) + + * allocating one or more physical pages for the buffer + + * allocating one or more physical pages for page tables and + internal structures + + * mapping the virtual memory segment to the physical allocated + pages + +.. slide:: Address space descriptors + :inline-contents: True + :level: 2 + + |_| + + .. ditaa:: + :--no-separation: + + +------------------+ +------------+ + | Address space | | |-------------->+------------+ + | descriptor | +------------+ | | + +------------------+ | | Page +------------+ + | +------------+ tables | | + +------------------+--------------+ | ... | +------------+ + | | +------------+ | ... | + v v | |-------+ +------------+ + +------------+ +------------+ +------------+ | | | + | Area | | Area | | +------------+ + | descriptor | | descriptor | | + +------------+ +------------+ | + | | + +-------------+------------------+ +------>+------------+ + | | | | + v v +------------+ + +------------+ +------------+ | | + | Area | | Area | +------------+ + | descriptor | | descriptor | | ... | + +------------+ +------------+ +------------+ + | | | + +-----------+-----------+ +------------+ + | | + v v + +------------+ +------------+ + | Area | | Area | + | descriptor | | descriptor | + +------------+ +------------+ + + +.. slide:: Address space descriptors + :inline-contents: True + :level: 2 + + * Page table is used either by: + + * The CPU's MMU + + * The kernel to handle TLB exception (some RISC processors) + + * The address space descriptor is used by the kernel to maintain + high level information such as file and file offset (for mmap + with files), read-only segment, copy-on-write segment, etc. + + +.. slide:: Allocating virtual memory + :inline-contents: True + :level: 2 + + * Search a free area in the address space descriptor + + * Allocate memory for a new area descriptor + + * Insert the new area descriptor in the address space descriptor + + * Allocate physical memory for one or more page tables + + * Setup the page tables for the newly allocated area in the virtual + address space + + * Allocating (on demand) physical pages and map them in the virtual + address space by updating the page tables + + +.. slide:: Freeing virtual memory + :inline-contents: True + :level: 2 + + * Removing the area descriptor + + * Freeing the area descriptor memory + + * Updating the page tables to remove the area from the virtual + address space + + * Flushing the TLB for the freed virtual memory area + + * Freeing physical memory of the page tables associated with the + freed area + + * Freeing physical memory of the freed virtual memory area + + +.. slide:: Linux virtual memory management + :inline-contents: True + :level: 2 + + * Kernel + + * vmalloc + + * area descriptor: :c:type:`struct vm_struct` + + * address space descriptor: simple linked list of :c:type:`struct vm_struct` + + * Userspace + + * area descriptor: :c:type:`struct vm_area_struct` + + * address space descriptor: :c:type:`struct mm_struct`, red-black tree + + +Fault page handling +=================== + +.. slide:: Linux virtual memory management + :inline-contents: True + :level: 2 + + .. image:: ../res/page-fault-handling.png diff --git a/Documentation/teaching/lectures/networking.rst b/Documentation/teaching/lectures/networking.rst new file mode 100644 index 00000000000000..8813d0caa3bd52 --- /dev/null +++ b/Documentation/teaching/lectures/networking.rst @@ -0,0 +1,594 @@ +================== +Network Management +================== + +`View slides `_ + +.. slideconf:: + :autoslides: False + :theme: single-level + +Lecture objectives: +=================== + +.. slide:: Network Management + :inline-contents: True + :level: 2 + + * Socket implementation + + * Routing implementation + + * Network Device Interface + + * Hardware and Software Acceleration Techniques + + +Network Management Overview +=========================== + +.. slide:: Network Management Overview + :inline-contents: True + :level: 2 + + .. ditaa:: + :height: 100% + + +---------------------------+ + | Berkeley Socket Interface | + +---------------------------+ + + +---------------------------+ + | Transport layer | + +-------------+-------------+ + | TCP | UDP | + +-------------+-------------+ + + +---------------------------+ + | Network layer | + +-----+---------+-----------+ + | IP | Routing | NetFilter | + +-----+---------+-----------+ + + +---------------------------+ + | Data link layer | + +-------+-------+-----------+ + | ETH | ARP | BRIDGING | + +-------+-------+-----------+ + + +---------------------------+ + | Queuing discipline | + +---------------------------+ + + +---------------------------+ + | Network device drivers | + +---------------------------+ + + +Sockets Implementation Overview +=============================== + +.. slide:: Sockets Implementation Overview + :inline-contents: True + :level: 2 + + .. ditaa:: + :height: 100% + + Socket + File + +------+ Operations + | FILE | ----------------------> +-----------+ + +------+ | read | + | | struct socket_alloc +-----------+ + | | +---------------+ | write | + | +------->| struct socket | +-----------+ + | f_private| +-----------+ | | select | + | | | ... | | +-----------+ + | | +-----------+ | | ... | + | +---------------+ +-----------+ + +--------->| struct inode | + f_inode | +-----------+ | + | | ... | | + | +-----------+ | + +---------------+ + + +Sockets Families and Protocols +=============================== + +.. slide:: Sockets Families and Protocols + :inline-contents: True + :level: 2 + + .. ditaa:: + :height: 100% + + + + struct socket +---------> struct proto_ops + +--------------------+ | +-----------------+ + | struct socket | | | release | + | | | +-----------------+ + +--------------------+ | | bind | + | struct proto_ops * |--------+ +-----------------+ + +--------------------+ | connect | + | ... | +-----------------+ + +---------------+ | accept | + +---------| struct sock * |-------+ +-----------------+ + | +---------------+ | | sendmsg | + | | +-----------------+ + | | | recvmsg | + | | +-----------------+ + | | | poll | + | | +-----------------+ + | | | ... | + | | +-----------------+ + | | + v v +--> struct sk_prot + struct tcp_sock struct tcp_sock | +--------------------+ + +-------------------+ +-------------------+ | | inet_dgram_connect | + | struct inet_sock | | struct inet_sock | | +--------------------+ + | +---------------+ | | +---------------+ | | | inet_sendmsg | + | | struct sock | | | | struct sock | | | +--------------------+ + | | +-----------+ | | | | +-----------+ | | | | udp_poll | + | | | ... | | | | | | ... | | | | +--------------------+ + | | +-----------+ | | | | +-----------+ | | | | inet_release | + | +---------------+ | | +---------------+ | | +--------------------+ + | | sk_prot * | | | | sk_prot * | |--+ | inet_bind | + | +---------------+ | | +---------------+ | +--------------------+ + +-------------------+ +-------------------+ | ... | + | ... | | ... | +--------------------+ + +-------------------+ +-------------------+ + + +Example: UDP send +----------------- + +.. slide:: Example: UDP send + :inline-contents: True + :level: 2 + + + .. code-block:: c + + char c; + struct sockaddr_in addr; + int s; + + s = socket(AF_INET, SOCK_DGRAM, 0); + connect(s, (struct sockaddr*)&addr, sizeof(addr)); + write(s, &c, 1); + close(s); + + +.. slide:: Example: UDP send + :inline-contents: True + :level: 2 + + .. ditaa:: + + -:------------------------------------------------------------------------------------ + + VFS layer sys_write → vfs_write → do_sync_write → filp->f_op->aio_write + + -:------------------------------------------------------------------------------------ + + Generic socket layer sock_aio_write → sock->ops->sendmsg + + -:------------------------------------------------------------------------------------ + + IP socket layer sk->sk_prot->sendmsg + + -:------------------------------------------------------------------------------------ + + UDP socket layer ip_append_data udp_flush_pending_frames + | | + -:------------------------------+------------------------------+----------------------- + V V + IP socket layer skb = sock_alloc_send_skb(); ip_local_out + skb_queue_tail(sk, skb) + + -:------------------------------------------------------------------------------------ + + routing + + +Network processing phases +========================= + +.. slide:: Network processing phases + :inline-contents: True + :level: 2 + + * Interrupt handler - device driver fetches data from the RX ring, + creates a network packet and queues it to the network stack for + processing + + * NET_SOFTIRQ - packet goes through the stack layer and it is + processed: decapsulate Ethernet frame, check IP packet and route + it, if local packet decapsulate protocol packet (e.g. TCP) and + queues it to a socket + + * Process context - application fetches data from the socket queue + or pushes data to the socket queue + + +Packet Routing +============== + +.. slide:: Packet Routing + :inline-contents: True + :level: 2 + + .. ditaa:: + + +----------------------+ +----------------------+ + | Application | | Application | + +----------------------+ +----------------------+ + | ^ | ^ + | send() | recv() | send() | recv() + V | V | + +----------------------+ +----------------------+ + | Socket | | Socket | + +----------------------+ +----------------------+ + | ^ | ^ + | | | | + v | v | + +---------------------------------------------------------+ + | Transport layer | + +---------------------------------------------------------+ + | ^ | ^ + | | | | + v | v | + +---------------------------------------------------------+ + | Network layer | + +---------------------------------------------------------+ + | ^ + | | + v | + /---------------------------------------------------------\ + | Routing | ----> Drop packet + \---------------------------------------------------------/ + ^ | ^ | + | RX | TX | RX | TX + | v | v + +-----------------------+ +-----------------------+ + | Network Device Driver | | Network Device Driver | + +-----------------------+ +-----------------------+ + + +Routing Table(s) +---------------- + +.. slide:: Routing Table + :inline-contents: True + :level: 2 + + + .. code-block:: shell + + tavi@desktop-tavi:~/src/linux$ ip route list table main + default via 172.30.240.1 dev eth0 + 172.30.240.0/20 dev eth0 proto kernel scope link src 172.30.249.241 + + tavi@desktop-tavi:~/src/linux$ ip route list table local + broadcast 127.0.0.0 dev lo proto kernel scope link src 127.0.0.1 + local 127.0.0.0/8 dev lo proto kernel scope host src 127.0.0.1 + local 127.0.0.1 dev lo proto kernel scope host src 127.0.0.1 + broadcast 127.255.255.255 dev lo proto kernel scope link src 127.0.0.1 + broadcast 172.30.240.0 dev eth0 proto kernel scope link src 172.30.249.241 + local 172.30.249.241 dev eth0 proto kernel scope host src 172.30.249.241 + broadcast 172.30.255.255 dev eth0 proto kernel scope link src 172.30.249.241 + + tavi@desktop-tavi:~/src/linux$ ip rule list + 0: from all lookup local + 32766: from all lookup main + 32767: from all lookup default + + +Routing Policy Database +----------------------- + +.. slide:: Routing Policy Database + :inline-contents: True + :level: 2 + + * "Regular" routing only uses the destination address + + * To increase flexibility a "Routing Policy Database" is used that + allows different routing based on other fields such as the source + address, protocol type, transport ports, etc. + + * This is encoded as a list of rules that are evaluated based on + their priority (priority 0 is the highest) + + * Each rule has a selector (how to match the packet) and an + action (what action to take if the packet matches) + + * Selectors: source address, destination address, type of service (TOS), + input interface, output interface, etc. + + * Action: lookup / unicast - use given routing table, blackhole - + drop packet, unreachable - send ICMP unreachable message and drop + packet, etc. + + + +Routing table processing +------------------------ + +.. slide:: Routing table processing + :inline-contents: True + :level: 2 + + * Special table for local addreses -> route packets to sockets + based on family, type, ports + + * Check every routing entry for starting with the most specific + routes (e.g. 192.168.0.0/24 is checked before 192.168.0.0/16) + + * A route matches if the packet destination addreess logical ORed + with the subnet mask equals the subnet address + + * Once a route matches the following information is retrieved: + interface, link layer next-hop address, network next host address + + +Forwarding Information Database +------------------------------- + +.. slide:: Forward Information Database (removed in 3.6) + :inline-contents: True + :level: 2 + + |_| + + .. image:: ../res/fidb-overview.png + + +.. slide:: Forward Information Database (removed in 3.6) + :inline-contents: True + :level: 2 + + .. image:: ../res/fidb-details.png + +.. slide:: Routing Cache (removed in 3.6) + :inline-contents: True + :level: 2 + + |_| + + .. image:: ../res/routing-cache.png + +.. slide:: FIB TRIE + :inline-contents: True + :level: 2 + + |_| + + .. image:: ../res/fib-trie.png + +.. slide:: Compressed Trie + :inline-contents: True + :level: 2 + + |_| + + .. image:: ../res/fib-trie-compressed.png + + +Netfilter +========= + +.. slide:: Netfilter + :inline-contents: True + :level: 2 + + + * Framework that implements packet filtering and NAT + + * It uses hooks inserted in key places in the packet flow: + + * NF_IP_PRE_ROUTING + + * NF_IP_LOCAL_IN + + * NF_IP_FORWARD + + * NF_IP_LOCAL_OUT + + * NF_IP_POST_ROUTING + + * NF_IP_NUMHOOKS + + + +Network packets / skbs (struct sk_buff) +======================================= + +.. slide:: Network packets (skbs) + :inline-contents: True + :level: 2 + + .. image:: ../res/skb.png + + +.. slide:: struct sk_buff + :inline-contents: True + :level: 2 + + .. code-block:: c + + struct sk_buff { + struct sk_buff *next; + struct sk_buff *prev; + + struct sock *sk; + ktime_t tstamp; + struct net_device *dev; + char cb[48]; + + unsigned int len, + data_len; + __u16 mac_len, + hdr_len; + + void (*destructor)(struct sk_buff *skb); + + sk_buff_data_t transport_header; + sk_buff_data_t network_header; + sk_buff_data_t mac_header; + sk_buff_data_t tail; + sk_buff_data_t end; + + unsigned char *head, + *data; + unsigned int truesize; + atomic_t users; + + +.. slide:: skb APIs + :inline-contents: True + :level: 2 + + .. code-block:: c + + /* reserve head room */ + void skb_reserve(struct sk_buff *skb, int len); + + /* add data to the end */ + unsigned char *skb_put(struct sk_buff *skb, unsigned int len); + + /* add data to the top */ + unsigned char *skb_push(struct sk_buff *skb, unsigned int len); + + /* discard data at the top */ + unsigned char *skb_pull(struct sk_buff *skb, unsigned int len); + + /* discard data at the end */ + unsigned char *skb_trim(struct sk_buff *skb, unsigned int len); + + unsigned char *skb_transport_header(const struct sk_buff *skb); + + void skb_reset_transport_header(struct sk_buff *skb); + + void skb_set_transport_header(struct sk_buff *skb, const int offset); + + unsigned char *skb_network_header(const struct sk_buff *skb); + + void skb_reset_network_header(struct sk_buff *skb); + + void skb_set_network_header(struct sk_buff *skb, const int offset); + + unsigned char *skb_mac_header(const struct sk_buff *skb); + + int skb_mac_header_was_set(const struct sk_buff *skb); + + void skb_reset_mac_header(struct sk_buff *skb); + + void skb_set_mac_header(struct sk_buff *skb, const int offset); + + +.. slide:: skb data management + :inline-contents: True + :level: 2 + + |_| + + .. ditaa:: + :height: 50% + + Head + ^ +---------------+ + skb_push | | | | skb_reserve + +---------------+ v + | Data | | skb_pull + ^ | | v + skb_trim | | Tail | + +---------------+ + | | | skb_put + +---------------+ v + End + + +Network Device +============== + +.. slide:: Network Device Interface + :inline-contents: True + :level: 2 + + .. image:: ../res/net-dev-hw.png + + +.. slide:: Advanced features + :inline-contents: True + :level: 2 + + * Scatter-Gather + + * Checksum offloading: Ethernet, IP, UDP, TCP + + * Adaptive interrupt handling (coalescence, adaptive) + + + +Hardware and Software Acceleration Techniques +============================================= + +.. slide:: TCP offload + :inline-contents: True + :level: 2 + + * Full offload - Implement TCP/IP stack in hardware + + * Issues: + + * Scaling number of connections + + * Security + + * Conformance + +.. slide:: Performance observation + :inline-contents: True + :level: 2 + + * Performance is proportional with the number of packets to be + processed + + * Example: if an end-point can process 60K pps + + * 1538 MSS -> 738Mbps + * 2038 MSS -> 978Mbps + * 9038 MSS -> 4.3Gbps + * 20738 MSS -> 9.9Gbps + +.. slide:: Stateless offload + :inline-contents: True + :level: 2 + + * The networking stack processes large packets + + * TX path: the hardware splits large packets in smaller packets + (TCP Segmentation Offload) + + * RX path: the hardware aggregates small packets into larger + packets (Large Receive Offload - LRO) + + +.. slide:: TCP Segmentation Offload + :inline-contents: True + :level: 2 + + .. image:: ../res/tso.png + +.. slide:: Large Receive Offload + :inline-contents: True + :level: 2 + + .. image:: ../res/lro.png + + + diff --git a/Documentation/teaching/lectures/processes.rst b/Documentation/teaching/lectures/processes.rst new file mode 100644 index 00000000000000..95c8e2f759f89b --- /dev/null +++ b/Documentation/teaching/lectures/processes.rst @@ -0,0 +1,1312 @@ +========= +Processes +========= + +`View slides `_ + +.. slideconf:: + :autoslides: False + :theme: single-level + +Lecture objectives +================== + +.. slide:: Processes and threads + :inline-contents: True + :level: 2 + + * Process and threads + + * Context switching + + * Blocking and waking up + + * Process context + + + +Processes and threads +===================== + +A process is an operating system abstraction that groups together +multiple resources: + +.. slide:: What is a process? + :inline-contents: True + :level: 2 + + .. hlist:: + :columns: 2 + + * An address space + * One or more threads + * Opened files + * Sockets + * Semaphores + * Shared memory regions + * Timers + * Signal handlers + * Many other resources and status information + + All this information is grouped in the Process Control Group + (PCB). In Linux this is :c:type:`struct task_struct`. + + +Overview of process resources +----------------------------- + +A summary of the resources a process has can be obtain from the +`/proc/` directory, where `` is the process id for the +process we want to look at. + +.. slide:: Overview of process resources + :inline-contents: True + :level: 2 + + .. fixme: ditta does not work well with text containing ':' or '-' characters + .. code-block:: none + + +-------------------------------------------------------------------+ + | dr-x------ 2 tavi tavi 0 2021 03 14 12:34 . | + | dr-xr-xr-x 6 tavi tavi 0 2021 03 14 12:34 .. | + | lrwx------ 1 tavi tavi 64 2021 03 14 12:34 0 -> /dev/pts/4 | + +--->| lrwx------ 1 tavi tavi 64 2021 03 14 12:34 1 -> /dev/pts/4 | + | | lrwx------ 1 tavi tavi 64 2021 03 14 12:34 2 -> /dev/pts/4 | + | | lr-x------ 1 tavi tavi 64 2021 03 14 12:34 3 -> /proc/18312/fd | + | +-------------------------------------------------------------------+ + | +----------------------------------------------------------------+ + | | 08048000-0804c000 r-xp 00000000 08:02 16875609 /bin/cat | + $ ls -1 /proc/self/ | 0804c000-0804d000 rw-p 00003000 08:02 16875609 /bin/cat | + cmdline | | 0804d000-0806e000 rw-p 0804d000 00:00 0 [heap] | + cwd | | ... | + environ | +----------->| b7f46000-b7f49000 rw-p b7f46000 00:00 0 | + exe | | | b7f59000-b7f5b000 rw-p b7f59000 00:00 0 | + fd --------+ | | b7f5b000-b7f77000 r-xp 00000000 08:02 11601524 /lib/ld-2.7.so | + fdinfo | | b7f77000-b7f79000 rw-p 0001b000 08:02 11601524 /lib/ld-2.7.so | + maps -----------+ | bfa05000-bfa1a000 rw-p bffeb000 00:00 0 [stack] | + mem | ffffe000-fffff000 r-xp 00000000 00:00 0 [vdso] | + root +----------------------------------------------------------------+ + stat +----------------------------+ + statm | Name: cat | + status ------+ | State: R (running) | + task | | Tgid: 18205 | + wchan +------>| Pid: 18205 | + | PPid: 18133 | + | Uid: 1000 1000 1000 1000 | + | Gid: 1000 1000 1000 1000 | + +----------------------------+ + + +:c:type:`struct task_struct` +---------------------------- + +Lets take a close look at :c:type:`struct task_struct`. For that we +could just look at the source code, but here we will use a tool called +`pahole` (part of the dwarves install package) in order to get +some insights about this structure: + + +.. slide:: struct task_struct + :inline-contents: True + :level: 2 + + .. code-block:: c + + $ pahole -C task_struct vmlinux + + struct task_struct { + struct thread_info thread_info; /* 0 8 */ + volatile long int state; /* 8 4 */ + void * stack; /* 12 4 */ + + ... + + /* --- cacheline 45 boundary (2880 bytes) --- */ + struct thread_struct thread __attribute__((__aligned__(64))); /* 2880 4288 */ + + /* size: 7168, cachelines: 112, members: 155 */ + /* sum members: 7148, holes: 2, sum holes: 12 */ + /* sum bitfield members: 7 bits, bit holes: 2, sum bit holes: 57 bits */ + /* paddings: 1, sum paddings: 2 */ + /* forced alignments: 6, forced holes: 2, sum forced holes: 12 */ + } __attribute__((__aligned__(64))); + + +As you can see it is a pretty large data structure: almost 8KB in size +and 155 fields. + + +Inspecting task_struct +---------------------- + +The following screencast is going to demonstrate how we can inspect +the process control block (:c:type:`struct task_struct`) by connecting +the debugger to the running virtual machine. We are going to use a +helper gdb command `lx-ps` to list the processes and the address of +the task_struct for each process. + +.. slide:: Inspecting task_struct + :inline-contents: True + :level: 2 + + |_| + + .. asciicast:: ../res/inspect_task_struct.cast + + +Quiz: Inspect a task to determine opened files +---------------------------------------------- + +.. slide:: Quiz: Inspect opened files + :inline-contents: True + :level: 2 + + Use the debugger to inspect the process named syslogd. + + * What command should we use to list the opened file descriptors? + + * How many file descriptors are opened? + + * What command should we use the determine the file name for opened file descriptor 3? + + * What is the filename for file descriptor 3? + + +Threads +------- + +A thread is the basic unit that the kernel process scheduler uses to +allow applications to run the CPU. A thread has the following +characteristics: + +.. slide:: Threads + :inline-contents: True + :level: 2 + + * Each thread has its own stack and together with the register + values it determines the thread execution state + + * A thread runs in the context of a process and all threads in the + same process share the resources + + * The kernel schedules threads not processes and user-level threads + (e.g. fibers, coroutines, etc.) are not visible at the kernel level + + +The typical thread implementation is one where the threads is +implemented as a separate data structure which is then linked to the +process data structure. For example, the Windows kernel uses such an +implementation: + + +.. slide:: Classic implementation (Windows) + :inline-contents: True + :level: 2 + + |_| + + .. ditaa:: + + EPROCESS + +------------------+ + +->| KPROCESS | + | +------------------+ + | | Process ID (PID) | + | +------------------+ + | | ... | + | +------------------+ + | | Thread list |--------------+------------------------------------+ + | +------------------+ | | + | | Opened files | ETHREAD V ETHREAD V + | | +--------------+ | + | | | FILE | | + | | +--------------+ | + | | | ... | | + | | +--------------+ | + | +------------------+ +-----------------------+ +-----------------------+ + | | Address Space | | KTHREAD | | KTHREAD | + | + +--------------+ | +-----------------------+ +-----------------------+ + | | | ... | | | Thread ID (TID) | | Thread ID (TID) | + | | +--------------+ | +-----------------------+ +-----------------------+ + | +------------------+ | Thread Start Address | | Thread Start Address | + | +-----------------------+ +-----------------------+ + | | ... | ... | ... | + | +-----------------------+ +-----------------------+ + | | Process | | Process | + | +-----------------------+ +-----------------------+ + | | | + +---------------------------------------+------------------------------------+ + + +Linux uses a different implementation for threads. The basic unit is +called a task (hence the :c:type:`struct task_struct`) and it is used +for both threads and processes. Instead of embedding resources in the +task structure it has pointers to these resources. + +Thus, if two threads are the same process will point to the same +resource structure instance. If two threads are in different processes +they will point to different resource structure instances. + + +.. slide:: Linux implementation + :inline-contents: True + :level: 2 + + |_| + + .. ditaa:: + + Opened files + task_struct +-------------------+ task_struct + +-----------------------+ | FILE | +-----------------------+ + | Thread Group ID (PID) | +--->+-------------------+<---+ | Thread Group ID (PID) | + +-----------------------+ | | .... | | +-----------------------+ + | Thread ID (TID) | | +-------------------+ | | Thread ID (TID) | + +-----------------------+ | | +-----------------------+ + | ... | | | | ... | + +-----------------------+ | | +-----------------------+ + | Opened files |--+ +--| Opened files | + +-----------------------+ Address Space +-----------------------+ + | Address Space |---+ +-------------------+ +---| Address Space | + +-----------------------+ | | | | +-----------------------+ + | ... | +-->| .... |<--+ | ... | + +-----------------------+ | | +-----------------------+ + +-------------------+ + + +The clone system call +--------------------- + +In Linux a new thread or process is create with the :c:func:`clone` +system call. Both the :c:func:`fork` system call and the +:c:func:`pthread_create` function uses the :c:func:`clone` +implementation. + +It allows the caller to decide what resources should be shared with +the parent and which should be copied or isolated: + +.. slide:: The clone system call + :inline-contents: True + :level: 2 + + * CLONE_FILES - shares the file descriptor table with the parent + + * CLONE_VM - shares the address space with the parent + + * CLONE_FS - shares the filesystem information (root directory, + current directory) with the parent + + * CLONE_NEWNS - does not share the mount namespace with the parent + + * CLONE_NEWIPC - does not share the IPC namespace (System V IPC + objects, POSIX message queues) with the parent + + * CLONE_NEWNET - does not share the networking namespaces (network + interfaces, routing table) with the parent + + +For example, if `CLONE_FILES | CLONE_VM | CLONE_FS` is used by the +caller then effectively a new thread is created. If these flags are +not used then a new process is created. + +Namespaces and "containers" +--------------------------- + +"Containers" are a form of lightweight virtual machines that share the +same kernel instance, as opposed to normal virtualization where a +hypervisor runs multiple VMs, each with its one kernel +instance. + +Examples of container technologies are LXC - that allows running +lightweight "VM" and docker - a specialized container for running a +single application. + +Containers are built on top of a few kernel features, one of which is +namespaces. They allow isolation of different resources that would +otherwise be globally visible. For example, without containers, all +processes would be visible in /proc. With containers, processes in one +container will not be visible (in /proc or be killable) to other +containers. + +To achieve this partitioning, the :c:type:`struct nsproxy` structure +is used to group types of resources that we want to partition. It +currently supports IPC, networking, cgroup, mount, networking, PID, +time namespaces. For example, instead of having a global list for +networking interfaces, the list is part of a :c:type:`struct net`. The +system initializes with a default namespace (:c:data:`init_net`) and by +default all processes will share this namespace. When a new namespace +is created a new net namespace is created and then new processes can +point to that new namespace instead of the default one. + + +.. slide:: Namespaces and "containers" + :inline-contents: False + :level: 2 + + * Containers = a form of lightweight virtual machines + + * Container based technologies: LXC, docker + + * Containers are built of top of kernel namespaces + + * Kernel namespaces allows isolation of otherwise globally visible + resources + + * :c:type:`struct nsproxy` has multiple namespaces each of which + can be selectively shared between groups of processes + + * At boot initial namespaces are created (e.g. :c:data:`init_net`) + that are by default shared between new processes (e.g. list of + available network interfaces) + + * New namespace can be created a runtime and new processes can + point to these new namespaces + + +Accessing the current process +----------------------------- + +.. slide:: Accessing the current process + :inline-contents: True + :level: 2 + + Accessing the current process is a frequent operation: + + * opening a file needs access to :c:type:`struct task_struct`'s + file field + + * mapping a new file needs access to :c:type:`struct task_struct`'s + mm field + + * Over 90% of the system calls needs to access the current process + structure so it needs to be fast + + * The :c:macro:`current` macro is available to access to current + process's :c:type:`struct task_struct` + +In order to support fast access in multi processor configurations a +per CPU variable is used to store and retrieve the pointer to the +current :c:type:`struct task_struct`: + +.. slide:: Accessing the current process on x86 + :inline-contents: True + :level: 2 + + |_| + + .. ditaa:: + + CPU0 + +------------+ task_struct + | ... | +--------> +-----------------------+ + +------------- | | Thread Group ID (PID) | + +--| FS | | +-----------------------+ + | +------------- | | Thread ID (TID) | + | | ... | | +-----------------------+ + | +------------+ | | ... | + | | +-----------------------+ + | Per CPU variables | | Opened files | + +->+-----------------------+ | +-----------------------+ + | ... | | | Address Space | + +-----------------------+ | +-----------------------+ + | current_task |------+ | ... | + +-----------------------+ +-----------------------+ + | ... | + +-----------------------+ + + +Previously the following sequence was used as the implementation for +the :c:macro:`current` macro: + +.. slide:: Previous implementation for current (x86) + :inline-contents: True + :level: 2 + + .. code-block:: c + + /* how to get the current stack pointer from C */ + register unsigned long current_stack_pointer asm("esp") __attribute_used__; + + /* how to get the thread information struct from C */ + static inline struct thread_info *current_thread_info(void) + { + return (struct thread_info *)(current_stack_pointer & ~(THREAD_SIZE – 1)); + } + + #define current current_thread_info()->task + + +Quiz: previous implementation for current (x86) +----------------------------------------------- + +.. slide:: Quiz: previous implementation for current (x86) + :inline-contents: True + :level: 2 + + What is the size of :c:type:`struct thread_info`? + + Which of the following are potential valid sizes for + :c:type:`struct thread_info`: 4095, 4096, 4097? + + + +Context switching +================= + +The following diagram shows an overview of the Linux kernel context +switch process: + +.. slide:: Overview the context switching processes + :inline-contents: True + :level: 2 + + .. ditaa:: + + Userspace Kernel Kernel Userspace + T0 T0 T1 T1 + + | + | syscall +-------------------+ + V --------->| Save user regs on | +-----------------+ + interrupt | the kernel stack | | Save user regs | + +-------------------+ | on kernel stack | + | +-----------------+ + |schedule() | + | |schedule() + V | + +-----------------+ V + | context_switch |------+ +-----------------+ + +-----------------+ | | context_switch | + +-----> +-----------------+ + | + V + +-------------------+ + | Pop user regs | + | from kernel stack | + +-------------------+ + | + | exit syscall + +--------------------> | + | + V + + +Note that before a context switch can occur we must do a kernel +transition, either with a system call or with an interrupt. At that +point the user space registers are saved on the kernel stack. At some +point the :c:func:`schedule` function will be called which can decide +that a context switch must occur from T0 to T1 (e.g. because the +current thread is blocking waiting for an I/O operation to complete or +because it's allocated time slice has expired). + +At that point :c:func:`context_switch` will perform architecture +specific operations and will switch the address space if needed: + + +.. slide:: context_switch + :inline-contents: True + :level: 2 + + .. code-block:: c + + static __always_inline struct rq * + context_switch(struct rq *rq, struct task_struct *prev, + struct task_struct *next, struct rq_flags *rf) + { + prepare_task_switch(rq, prev, next); + + /* + * For paravirt, this is coupled with an exit in switch_to to + * combine the page table reload and the switch backend into + * one hypercall. + */ + arch_start_context_switch(prev); + + /* + * kernel -> kernel lazy + transfer active + * user -> kernel lazy + mmgrab() active + * + * kernel -> user switch + mmdrop() active + * user -> user switch + */ + if (!next->mm) { // to kernel + enter_lazy_tlb(prev->active_mm, next); + + next->active_mm = prev->active_mm; + if (prev->mm) // from user + mmgrab(prev->active_mm); + else + prev->active_mm = NULL; + } else { // to user + membarrier_switch_mm(rq, prev->active_mm, next->mm); + /* + * sys_membarrier() requires an smp_mb() between setting + * rq->curr / membarrier_switch_mm() and returning to userspace. + * + * The below provides this either through switch_mm(), or in + * case 'prev->active_mm == next->mm' through + * finish_task_switch()'s mmdrop(). + */ + switch_mm_irqs_off(prev->active_mm, next->mm, next); + + if (!prev->mm) { // from kernel + /* will mmdrop() in finish_task_switch(). */ + rq->prev_mm = prev->active_mm; + prev->active_mm = NULL; + } + } + + rq->clock_update_flags &= ~(RQCF_ACT_SKIP|RQCF_REQ_SKIP); + + prepare_lock_switch(rq, next, rf); + + /* Here we just switch the register state and the stack. */ + switch_to(prev, next, prev); + barrier(); + + return finish_task_switch(prev); + } + + +Then it will call the architecture specific :c:macro:`switch_to` +implementation to switch the registers state and kernel stack. Note +that registers are saved on stack and that the stack pointer is saved +in the task structure: + +.. slide:: switch_to + :inline-contents: True + :level: 2 + + .. code-block:: c + :emphasize-lines: 28-30,56 + + #define switch_to(prev, next, last) \ + do { \ + ((last) = __switch_to_asm((prev), (next))); \ + } while (0) + + + /* + * %eax: prev task + * %edx: next task + */ + .pushsection .text, "ax" + SYM_CODE_START(__switch_to_asm) + /* + * Save callee-saved registers + * This must match the order in struct inactive_task_frame + */ + pushl %ebp + pushl %ebx + pushl %edi + pushl %esi + /* + * Flags are saved to prevent AC leakage. This could go + * away if objtool would have 32bit support to verify + * the STAC/CLAC correctness. + */ + pushfl + + /* switch stack */ + movl %esp, TASK_threadsp(%eax) + movl TASK_threadsp(%edx), %esp + + #ifdef CONFIG_STACKPROTECTOR + movl TASK_stack_canary(%edx), %ebx + movl %ebx, PER_CPU_VAR(stack_canary)+stack_canary_offset + #endif + + #ifdef CONFIG_RETPOLINE + /* + * When switching from a shallower to a deeper call stack + * the RSB may either underflow or use entries populated + * with userspace addresses. On CPUs where those concerns + * exist, overwrite the RSB with entries which capture + * speculative execution to prevent attack. + */ + FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW + #endif + + /* Restore flags or the incoming task to restore AC state. */ + popfl + /* restore callee-saved registers */ + popl %esi + popl %edi + popl %ebx + popl %ebp + + jmp __switch_to + SYM_CODE_END(__switch_to_asm) + .popsection + + +You can notice that the instruction pointer is not explicitly +saved. It is not needed because: + + * a task will always resume in this function + + * the :c:func:`schedule` (:c:func:`context_switch` is always + inlined) caller's return address is saved on the kernel stack + + * a jmp is used to execute :c:func:`__switch_to` which is a function + and when it returns it will pop the original (next task) return + address from the stack + + +The following screencast uses the debugger to setup a breaking in +__switch_to_asm and examine the stack during the context switch: + +.. slide:: Inspecting task_struct + :inline-contents: True + :level: 2 + + |_| + + .. asciicast:: ../res/context_switch.cast + + +Quiz: context switch +-------------------- + +.. slide:: Quiz: context switch + :inline-contents: True + :level: 2 + + We are executing a context switch. Select all of the statements that are true. + + * the ESP register is saved in the task structure + + * the EIP register is saved in the task structure + + * general registers are saved in the task structure + + * the ESP register is saved on the stack + + * the EIP register is saved on the stack + + * general registers are saved on the stack + + +Blocking and waking up tasks +============================ + +Task states +----------- + +The following diagram shows to the task (threads) states and the +possible transitions between them: + +.. slide:: Task states + :inline-contents: True + :level: 2 + + .. ditaa:: + + preemption + +------------------------------+ + | | + V | + +------------+ +--------------+ +-------------+ + clone() | | schedule() | | exit() | | + -----------> | TASK_READY |-------------->| TASK_RUNNING |---------------->| TASK_DEAD | + | | | |--------+ | TASK_ZOMBIE | + +------------+ +--------------+ | | | + ^ | +-------------+ + | | + | | + | | + | signal +----------------------+ | + +-----------| | | + | | | wait_event() | + | wake_up() | TASK_INTERRUPTIBLE |<--------------+ + +-----------| | | + | | | | + | +----------------------+ | + | | + | | + | +----------------------+ | + | | | wait_event() | + | wake_up() | TASK_UNINTERRUPTIBLE |<--------------+ + +-----------| | + +----------------------+ + + +Blocking the current thread +--------------------------- + +Blocking the current thread is an important operation we need to +perform to implement efficient task scheduling - we want to run other +threads while I/O operations complete. + +In order to accomplish this the following operations take place: + +.. slide:: Blocking the current thread + :inline-contents: True + :level: 2 + + * Set the current thread state to TASK_UINTERRUPTIBLE or + TASK_INTERRUPTIBLE + + * Add the task to a waiting queue + + * Call the scheduler which will pick up a new task from the READY + queue + + * Do the context switch to the new task + +Below are some snippets for the :c:macro:`wait_event` +implementation. Note that the waiting queue is a list with some extra +information like a pointer to the task struct. + +Also note that a lot of effort is put into making sure no deadlock can +occur between :c:macro:`wait_event` and :c:macro:`wake_up`: the task +is added to the list before checking :c:data:`condition`, signals are +checked before calling :c:func:`schedule`. + +.. slide:: wait_event + :inline-contents: True + :level: 2 + + .. code-block:: c + + /** + * wait_event - sleep until a condition gets true + * @wq_head: the waitqueue to wait on + * @condition: a C expression for the event to wait for + * + * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the + * @condition evaluates to true. The @condition is checked each time + * the waitqueue @wq_head is woken up. + * + * wake_up() has to be called after changing any variable that could + * change the result of the wait condition. + */ + #define wait_event(wq_head, condition) \ + do { \ + might_sleep(); \ + if (condition) \ + break; \ + __wait_event(wq_head, condition); \ + } while (0) + + #define __wait_event(wq_head, condition) \ + (void)___wait_event(wq_head, condition, TASK_UNINTERRUPTIBLE, 0, 0, \ + schedule()) + + /* + * The below macro ___wait_event() has an explicit shadow of the __ret + * variable when used from the wait_event_*() macros. + * + * This is so that both can use the ___wait_cond_timeout() construct + * to wrap the condition. + * + * The type inconsistency of the wait_event_*() __ret variable is also + * on purpose; we use long where we can return timeout values and int + * otherwise. + */ + #define ___wait_event(wq_head, condition, state, exclusive, ret, cmd) \ + ({ \ + __label__ __out; \ + struct wait_queue_entry __wq_entry; \ + long __ret = ret; /* explicit shadow */ \ + \ + init_wait_entry(&__wq_entry, exclusive ? WQ_FLAG_EXCLUSIVE : 0); \ + for (;;) { \ + long __int = prepare_to_wait_event(&wq_head, &__wq_entry, state);\ + \ + if (condition) \ + break; \ + \ + if (___wait_is_interruptible(state) && __int) { \ + __ret = __int; \ + goto __out; \ + } \ + \ + cmd; \ + } \ + finish_wait(&wq_head, &__wq_entry); \ + __out: __ret; \ + }) + + void init_wait_entry(struct wait_queue_entry *wq_entry, int flags) + { + wq_entry->flags = flags; + wq_entry->private = current; + wq_entry->func = autoremove_wake_function; + INIT_LIST_HEAD(&wq_entry->entry); + } + + long prepare_to_wait_event(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state) + { + unsigned long flags; + long ret = 0; + + spin_lock_irqsave(&wq_head->lock, flags); + if (signal_pending_state(state, current)) { + /* + * Exclusive waiter must not fail if it was selected by wakeup, + * it should "consume" the condition we were waiting for. + * + * The caller will recheck the condition and return success if + * we were already woken up, we can not miss the event because + * wakeup locks/unlocks the same wq_head->lock. + * + * But we need to ensure that set-condition + wakeup after that + * can't see us, it should wake up another exclusive waiter if + * we fail. + */ + list_del_init(&wq_entry->entry); + ret = -ERESTARTSYS; + } else { + if (list_empty(&wq_entry->entry)) { + if (wq_entry->flags & WQ_FLAG_EXCLUSIVE) + __add_wait_queue_entry_tail(wq_head, wq_entry); + else + __add_wait_queue(wq_head, wq_entry); + } + set_current_state(state); + } + spin_unlock_irqrestore(&wq_head->lock, flags); + + return ret; + } + + static inline void __add_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry) + { + list_add(&wq_entry->entry, &wq_head->head); + } + + static inline void __add_wait_queue_entry_tail(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry) + { + list_add_tail(&wq_entry->entry, &wq_head->head); + } + + /** + * finish_wait - clean up after waiting in a queue + * @wq_head: waitqueue waited on + * @wq_entry: wait descriptor + * + * Sets current thread back to running state and removes + * the wait descriptor from the given waitqueue if still + * queued. + */ + void finish_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry) + { + unsigned long flags; + + __set_current_state(TASK_RUNNING); + /* + * We can check for list emptiness outside the lock + * IFF: + * - we use the "careful" check that verifies both + * the next and prev pointers, so that there cannot + * be any half-pending updates in progress on other + * CPU's that we haven't seen yet (and that might + * still change the stack area. + * and + * - all other users take the lock (ie we can only + * have _one_ other CPU that looks at or modifies + * the list). + */ + if (!list_empty_careful(&wq_entry->entry)) { + spin_lock_irqsave(&wq_head->lock, flags); + list_del_init(&wq_entry->entry); + spin_unlock_irqrestore(&wq_head->lock, flags); + } + } + + + +Waking up a task +---------------- + +We can wake-up tasks by using the :c:macro:`wake_up` primitive. The +following high level operations are performed to wake up a task: + +.. slide:: Waking up a task + :inline-contents: True + :level: 2 + + * Select a task from the waiting queue + + * Set the task state to TASK_READY + + * Insert the task into the scheduler's READY queue + + * On SMP system this is a complex operation: each processor has its + own queue, queues need to be balanced, CPUs needs to be signaled + + +.. slide:: wake_up + :inline-contents: True + :level: 2 + + .. code-block:: c + + #define wake_up(x) __wake_up(x, TASK_NORMAL, 1, NULL) + + /** + * __wake_up - wake up threads blocked on a waitqueue. + * @wq_head: the waitqueue + * @mode: which threads + * @nr_exclusive: how many wake-one or wake-many threads to wake up + * @key: is directly passed to the wakeup function + * + * If this function wakes up a task, it executes a full memory barrier before + * accessing the task state. + */ + void __wake_up(struct wait_queue_head *wq_head, unsigned int mode, + int nr_exclusive, void *key) + { + __wake_up_common_lock(wq_head, mode, nr_exclusive, 0, key); + } + + static void __wake_up_common_lock(struct wait_queue_head *wq_head, unsigned int mode, + int nr_exclusive, int wake_flags, void *key) + { + unsigned long flags; + wait_queue_entry_t bookmark; + + bookmark.flags = 0; + bookmark.private = NULL; + bookmark.func = NULL; + INIT_LIST_HEAD(&bookmark.entry); + + do { + spin_lock_irqsave(&wq_head->lock, flags); + nr_exclusive = __wake_up_common(wq_head, mode, nr_exclusive, + wake_flags, key, &bookmark); + spin_unlock_irqrestore(&wq_head->lock, flags); + } while (bookmark.flags & WQ_FLAG_BOOKMARK); + } + + /* + * The core wakeup function. Non-exclusive wakeups (nr_exclusive == 0) just + * wake everything up. If it's an exclusive wakeup (nr_exclusive == small +ve + * number) then we wake all the non-exclusive tasks and one exclusive task. + * + * There are circumstances in which we can try to wake a task which has already + * started to run but is not in state TASK_RUNNING. try_to_wake_up() returns + * zero in this (rare) case, and we handle it by continuing to scan the queue. + */ + static int __wake_up_common(struct wait_queue_head *wq_head, unsigned int mode, + int nr_exclusive, int wake_flags, void *key, + wait_queue_entry_t *bookmark) + { + wait_queue_entry_t *curr, *next; + int cnt = 0; + + lockdep_assert_held(&wq_head->lock); + + if (bookmark && (bookmark->flags & WQ_FLAG_BOOKMARK)) { + curr = list_next_entry(bookmark, entry); + + list_del(&bookmark->entry); + bookmark->flags = 0; + } else + curr = list_first_entry(&wq_head->head, wait_queue_entry_t, entry); + + if (&curr->entry == &wq_head->head) + return nr_exclusive; + + list_for_each_entry_safe_from(curr, next, &wq_head->head, entry) { + unsigned flags = curr->flags; + int ret; + + if (flags & WQ_FLAG_BOOKMARK) + continue; + + ret = curr->func(curr, mode, wake_flags, key); + if (ret < 0) + break; + if (ret && (flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive) + break; + + if (bookmark && (++cnt > WAITQUEUE_WALK_BREAK_CNT) && + (&next->entry != &wq_head->head)) { + bookmark->flags = WQ_FLAG_BOOKMARK; + list_add_tail(&bookmark->entry, &next->entry); + break; + } + } + + return nr_exclusive; + } + + int autoremove_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *key) + { + int ret = default_wake_function(wq_entry, mode, sync, key); + + if (ret) + list_del_init_careful(&wq_entry->entry); + + return ret; + } + + int default_wake_function(wait_queue_entry_t *curr, unsigned mode, int wake_flags, + void *key) + { + WARN_ON_ONCE(IS_ENABLED(CONFIG_SCHED_DEBUG) && wake_flags & ~WF_SYNC); + return try_to_wake_up(curr->private, mode, wake_flags); + } + + /** + * try_to_wake_up - wake up a thread + * @p: the thread to be awakened + * @state: the mask of task states that can be woken + * @wake_flags: wake modifier flags (WF_*) + * + * Conceptually does: + * + * If (@state & @p->state) @p->state = TASK_RUNNING. + * + * If the task was not queued/runnable, also place it back on a runqueue. + * + * This function is atomic against schedule() which would dequeue the task. + * + * It issues a full memory barrier before accessing @p->state, see the comment + * with set_current_state(). + * + * Uses p->pi_lock to serialize against concurrent wake-ups. + * + * Relies on p->pi_lock stabilizing: + * - p->sched_class + * - p->cpus_ptr + * - p->sched_task_group + * in order to do migration, see its use of select_task_rq()/set_task_cpu(). + * + * Tries really hard to only take one task_rq(p)->lock for performance. + * Takes rq->lock in: + * - ttwu_runnable() -- old rq, unavoidable, see comment there; + * - ttwu_queue() -- new rq, for enqueue of the task; + * - psi_ttwu_dequeue() -- much sadness :-( accounting will kill us. + * + * As a consequence we race really badly with just about everything. See the + * many memory barriers and their comments for details. + * + * Return: %true if @p->state changes (an actual wakeup was done), + * %false otherwise. + */ + static int + try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) + { + ... + + +Preempting tasks +================ + +Up until this point we look at how context switches occurs voluntary +between threads. Next we will look at how preemption is handled. We +will start wight the simpler case where the kernel is configured as +non preemptive and then we will move to the preemptive kernel case. + +Non preemptive kernel +--------------------- + +.. slide:: Non preemptive kernel + :inline-contents: True + :level: 2 + + * At every tick the kernel checks to see if the current process has + its time slice consumed + + * If that happens a flag is set in interrupt context + + * Before returning to userspace the kernel checks this flag and + calls :c:func:`schedule` if needed + + * In this case tasks are not preempted while running in kernel mode + (e.g. system call) so there are no synchronization issues + + +Preemptive kernel +----------------- + +In this case the current task can be preempted even if we are running +in kernel mode and executing a system call. This requires using a +special synchronization primitives: :c:macro:`preempt_disable` and +:c:macro:`preempt_enable`. + +In order to simplify handling for preemptive kernels and since +synchronization primitives are needed for the SMP case anyway, +preemption is disabled automatically when a spinlock is used. + +As before, if we run into a condition that requires the preemption of +the current task (its time slices has expired) a flag is set. This +flag is checked whenever the preemption is reactivated, e.g. when +exiting a critical section through a :c:func:`spin_unlock` and if +needed the scheduler is called to select a new task. + + +.. slide:: Preemptive kernel + :inline-contents: False + :level: 2 + + * Tasks can be preempted even when running in kernel mode + + * It requires new synchronization primitives to be used in critical + sections: :c:macro:`preempt_disable` and + :c:macro:`preempt_enable` + + * Spinlocks also disable preemption + + * When a thread needs to be preempted a flag is set and action is + taken (e.g. scheduler is called) when preemption is reactivated + + +Process context +=============== + +Now that we have examined the implementation of processes and threads +(tasks), how context switching occurs, how we can block, wake-up and +preempt tasks, we can finally define what the process context is what +are its properties: + +.. slide:: Process context + :inline-contents: True + :level: 2 + + The kernel is executing in process context when it is running a + system call. + + In process context there is a well defined context and we can + access the current process data with :c:macro:`current` + + In process context we can sleep (wait on a condition). + + In process context we can access the user-space (unless we are + running in a kernel thread context). + + +Kernel threads +-------------- + +.. slide:: Kernel threads + :inline-contents: True + :level: 2 + + Sometimes the kernel core or device drivers need to perform blocking + operations and thus they need to run in process context. + + Kernel threads are used exactly for this and are a special class of + tasks that don't "userspace" resources (e.g. no address space or + opened files). + + +The following screencast takes a closer look at kernel threads: + +.. slide:: Inspecting kernel threads + :inline-contents: True + :level: 2 + + |_| + + .. asciicast:: ../res/kernel_threads.cast + + +Using gdb scripts for kernel inspection +======================================= + +The Linux kernel comes with a predefined set of gdb extra commands we +can use to inspect the kernel during debugging. They will +automatically be loaded as long gdbinit is properly setup + +.. code-block:: sh + + ubuntu@so2:/linux/tools/labs$ cat ~/.gdbinit + add-auto-load-safe-path /linux/scripts/gdb/vmlinux-gdb.py + +All of the kernel specific commands are prefixed with lx-. You can use +TAB in gdb to list all of them: + +.. code-block:: sh + + (gdb) lx- + lx-clk-summary lx-dmesg lx-mounts + lx-cmdline lx-fdtdump lx-ps + lx-configdump lx-genpd-summary lx-symbols + lx-cpus lx-iomem lx-timerlist + lx-device-list-bus lx-ioports lx-version + lx-device-list-class lx-list-check + lx-device-list-tree lx-lsmod + +The implementation of the commands can be found at +`script/gdb/linux`. Lets take a closer look at the lx-ps +implementation: + + +.. code-block:: python + + task_type = utils.CachedType("struct task_struct") + + + def task_lists(): + task_ptr_type = task_type.get_type().pointer() + init_task = gdb.parse_and_eval("init_task").address + t = g = init_task + + while True: + while True: + yield t + + t = utils.container_of(t['thread_group']['next'], + task_ptr_type, "thread_group") + if t == g: + break + + t = g = utils.container_of(g['tasks']['next'], + task_ptr_type, "tasks") + if t == init_task: + return + + + class LxPs(gdb.Command): + """Dump Linux tasks.""" + + def __init__(self): + super(LxPs, self).__init__("lx-ps", gdb.COMMAND_DATA) + + def invoke(self, arg, from_tty): + gdb.write("{:>10} {:>12} {:>7}\n".format("TASK", "PID", "COMM")) + for task in task_lists(): + gdb.write("{} {:^5} {}\n".format( + task.format_string().split()[0], + task["pid"].format_string(), + task["comm"].string())) + + + +Quiz: Kernel gdb scripts +------------------------ + +.. slide:: Quiz: Kernel gdb scripts + :inline-contents: True + :level: 2 + + What is the following change of the lx-ps script trying to + accomplish? + + .. code-block:: diff + + diff --git a/scripts/gdb/linux/tasks.py b/scripts/gdb/linux/tasks.py + index 17ec19e9b5bf..7e43c163832f 100644 + --- a/scripts/gdb/linux/tasks.py + +++ b/scripts/gdb/linux/tasks.py + @@ -75,10 +75,13 @@ class LxPs(gdb.Command): + def invoke(self, arg, from_tty): + gdb.write("{:>10} {:>12} {:>7}\n".format("TASK", "PID", "COMM")) + for task in task_lists(): + - gdb.write("{} {:^5} {}\n".format( + + check = task["mm"].format_string() == "0x0" + + gdb.write("{} {:^5} {}{}{}\n".format( + task.format_string().split()[0], + task["pid"].format_string(), + - task["comm"].string())) + + "[" if check else "", + + task["comm"].string(), + + "]" if check else "")) + + + LxPs() + diff --git a/Documentation/teaching/lectures/smp.rst b/Documentation/teaching/lectures/smp.rst new file mode 100644 index 00000000000000..29706286c2208e --- /dev/null +++ b/Documentation/teaching/lectures/smp.rst @@ -0,0 +1,1184 @@ +========================== +Symmetric Multi-Processing +========================== + +`View slides `_ + +.. slideconf:: + :autoslides: False + :theme: single-level + +Lecture objectives: +=================== + +.. slide:: Symmetric Multi-Processing + :inline-contents: True + :level: 2 + + * Kernel Concurrency + + * Atomic operations + + * Spin locks + + * Cache thrashing + + * Optimized spin locks + + * Process and Interrupt Context Synchronization + + * Mutexes + + * Per CPU data + + * Memory Ordering and Barriers + + * Read-Copy Update + + +Synchronization basics +====================== + +Because the Linux kernel supports symmetric multi-processing (SMP) it +must use a set of synchronization mechanisms to achieve predictable +results, free of race conditions. + +.. note:: We will use the terms core, CPU and processor as + interchangeable for the purpose of this lecture. + +Race conditions can occur when the following two conditions happen +simultaneously: + +.. slide:: Race conditions + :inline-contents: True + :level: 2 + + * there are at least two execution contexts that run in "parallel": + + * truly run in parallel (e.g. two system calls running on + different processors) + + * one of the contexts can arbitrary preempt the other (e.g. an + interrupt preempts a system call) + + * the execution contexts perform read-write accesses to shared + memory + + +Race conditions can lead to erroneous results that are hard to debug, +because they manifest only when the execution contexts are scheduled +on the CPU cores in a very specific order. + +A classical race condition example is an incorrect implementation for +a release operation of a resource counter: + +.. slide:: Race condition: resource counter release + :inline-contents: True + :level: 2 + + .. code-block:: c + + void release_resource() + { + counter--; + + if (!counter) + free_resource(); + } + + +A resource counter is used to keep a shared resource available until +the last user releases it but the above implementation has a race +condition that can cause freeing the resource twice: + + +.. slide:: Race condition scenario + :inline-contents: True + :level: 2 + + |_| + + .. ditaa:: + + counter is 2 + + Thread A Thread B + + * + | + | + +---------------------+ + | dec counter | counter is 1 + | cEEE | + +---------------------+ + | + | B preempts A + +-----------------------------------------------+ + | + v + +----------------------+ + counter is 0 | dec counter | + | if (!counter) | + resource is freed | free_resource(); | + | cEEE | + +----------------------+ + B finishes, A continues | + +-----------------------------------------------+ + | + v + +----------------------+ + | if (!counter) | + | free_resource(); | resource is freed + | cEEE | + +----------------------+ + +In most cases the `release_resource()` function will only free the +resource once. However, in the scenario above, if thread A is +preempted right after decrementing `counter` and thread B calls +`release_resource()` it will cause the resource to be freed. When +resumed, thread A will also free the resource since the counter value +is 0. + +To avoid race conditions the programmer must first identify the +critical section that can generate a race condition. The critical +section is the part of the code that reads and writes shared memory +from multiple parallel contexts. + +In the example above, the minimal critical section is starting with +the counter decrement and ending with checking the counter's value. + +Once the critical section has been identified race conditions can be +avoided by using one of the following approaches: + +.. slide:: Avoiding race conditions + :inline-contents: True + :level: 2 + + * make the critical section **atomic** (e.g. use atomic + instructions) + + * **disable preemption** during the critical section (e.g. disable + interrupts, bottom-half handlers, or thread preemption) + + * **serialize the access** to the critical section (e.g. use spin + locks or mutexes to allow only one context or thread in the + critical section) + + + +Linux kernel concurrency sources +================================ + +There are multiple source of concurrency in the Linux kernel that +depend on the kernel configuration as well as the type of system it +runs on: + + +.. slide:: Linux kernel concurrency sources + :inline-contents: True + :level: 2 + + * **single core systems**, **non-preemptive kernel**: the current + process can be preempted by interrupts + + * **single core systems**, **preemptive kernel**: above + the + current process can be preempted by other processes + + * **multi-core systems**: above + the current process can run + in parallel with another process or with an interrupt running on + another processor + +.. note:: We only discuss kernel concurrency and that is why a + non-preemptive kernel running on an single core system + has interrupts as the only source of concurrency. + + +Atomic operations +================= + +In certain circumstances we can avoid race conditions by using atomic +operations that are provided by hardware. Linux provides a unified API +to access atomic operations: + +.. slide:: Atomic operations + :inline-contents: True + :level: 2 + + * integer based: + + * simple: :c:func:`atomic_inc`, :c:func:`atomic_dec`, + :c:func:`atomic_add`, :c:func:`atomic_sub` + + * conditional: :c:func:`atomic_dec_and_test`, :c:func:`atomic_sub_and_test` + + * bit based: + + * simple: :c:func:`test_bit`, :c:func:`set_bit`, + :c:func:`change_bit` + + * conditional: :c:func:`test_and_set_bit`, :c:func:`test_and_clear_bit`, + :c:func:`test_and_change_bit` + +For example, we could use :c:func:`atomic_dec_and_test` to implement +the resource counter decrement and value checking atomic: + +.. slide:: Using :c:func:`atomic_dec_and_test` to implement resource counter release + :inline-contents: True + :level: 2 + + .. code-block:: c + + void release_resource() + { + if (atomic_dec_and_test(&counter)) + free_resource(); + } + + +One complication with atomic operations is encountered in +multi-core systems, where an atomic operation is not longer +atomic at the system level (but still atomic at the core level). + +To understand why, we need to decompose the atomic operation in memory +loads and stores. Then we can construct race condition scenarios where +the load and store operations are interleaved across CPUs, like in the +example below where incrementing a value from two processors will +produce an unexpected result: + +.. slide:: Atomic operations may not be atomic on SMP systems + :inline-contents: True + :level: 2 + + |_| + + .. ditaa:: + + + +------------+ + | Memory | + +-------------+ LOAD (0) | | +-------------+ + | CPU 0 |<--------------| v <- 0 | LOAD (0) | CPU 1 | + | | STORE (1) | |-------------->| | + | inc v |-------------->| v <- 1 | STORE (1) | inc v | + | cEEE | | v <- 1 |<--------------| cEEE | + +-------------+ | cEEE | +-------------+ + +------------+ + + +In order to provide atomic operations on SMP systems different +architectures use different techniques. For example, on x86 a LOCK +prefix is used to lock the system bus while executing the prefixed +operation: + +.. slide:: Fixing atomic operations for SMP systems (x86) + :inline-contents: True + :level: 2 + + |_| + + .. ditaa:: + + +------------+ + +-------------+ BUS LOCK | Memory | + | CPU 1 |<------------->| | + | | LOAD (0) | | + | inc v |<--------------| v <- 0 | + | | STORE (1) | | + | |-------------->| v <- 1 | + | | BUS UNLOCK | | + | cEEE |<------------->| | BUS LOCK +-------------+ + +-------------+ | |<------------->| CPU 1 | + | | LOAD (1) | | + | |<--------------| inc v | + | v <- 2 | STORE (2) | | + | |-------------->| | + | | BUS UNLOCK | | + | cEEE |<------------->| cEEE | + +------------+ +-------------+ + + +On ARM the LDREX and STREX instructions are used together to guarantee +atomic access: LDREX loads a value and signals the exclusive monitor +that an atomic operation is in progress. The STREX attempts to store a +new value but only succeeds if the exclusive monitor has not detected +other exclusive operations. So, to implement atomic operations the +programmer must retry the operation (both LDREX and STREX) until the +exclusive monitor signals a success. + +Although they are often interpreted as "light" or "efficient" +synchronization mechanisms (because they "don't require spinning or +context switches", or because they "are implemented in hardware so +they must be more efficient", or because they "are just instructions +so they must have similar efficiency as other instructions"), as seen +from the implementation details, atomic operations are actually +expensive. + + +Disabling preemption (interrupts) +================================= + +On single core systems and non preemptive kernels the only source of +concurrency is the preemption of the current thread by an +interrupt. To prevent concurrency is thus sufficient to disable +interrupts. + +This is done with architecture specific instructions, but Linux offers +architecture independent APIs to disable and enable interrupts: + +.. slide:: Synchronization with interrupts (x86) + :inline-contents: True + :level: 2 + + .. code-block:: c + + #define local_irq_disable() \ + asm volatile („cli” : : : „memory”) + + #define local_irq_enable() \ + asm volatile („sti” : : : „memory”) + + #define local_irq_save(flags) \ + asm volatile ("pushf ; pop %0" :"=g" (flags) + : /* no input */: "memory") \ + asm volatile("cli": : :"memory") + + #define local_irq_restore(flags) \ + asm volatile ("push %0 ; popf" + : /* no output */ + : "g" (flags) :"memory", "cc"); + + +Although the interrupts can be explicitly disabled and enable with +:c:func:`local_irq_disable` and :c:func:`local_irq_enable` these APIs +should only be used when the current state and interrupts is +known. They are usually used in core kernel code (like interrupt +handling). + +For typical cases where we want to avoid interrupts due to concurrency +issues it is recommended to use the :c:func:`local_irq_save` and +:c:func:`local_irq_restore` variants. They take care of saving and +restoring the interrupts states so they can be freely called from +overlapping critical sections without the risk of accidentally +enabling interrupts while still in a critical section, as long as the +calls are balanced. + +Spin Locks +========== + +Spin locks are used to serialize access to a critical section. They +are necessary on multi-core systems where we can have true execution +parallelism. This is a typical spin lock implementation: + + +.. slide:: Spin Lock Implementation Example (x86) + :inline-contents: True + :level: 2 + + .. code-block:: asm + + spin_lock: + lock bts [my_lock], 0 + jc spin_lock + + /* critical section */ + + spin_unlock: + mov [my_lock], 0 + + **bts dts, src** - bit test and set; it copies the src bit from the dts + memory address to the carry flag and then sets it: + + .. code-block:: c + + CF <- dts[src] + dts[src] <- 1 + + +As it can be seen, the spin lock uses an atomic instruction to make +sure that only one core can enter the critical section. If there are +multiple cores trying to enter they will continuously "spin" until the +lock is released. + +While the spin lock avoids race conditions, it can have a significant +impact on the system's performance due to "lock contention": + + +.. slide:: Lock Contention + :inline-contents: True + :level: 2 + + * There is lock contention when at least one core spins trying to + enter the critical section lock + + * Lock contention grows with the critical section size, time spent + in the critical section and the number of cores in the system + + +Another negative side effect of spin locks is cache thrashing. + +.. slide:: Cache Thrashing + :inline-contents: True + :level: 2 + + Cache thrashing occurs when multiple cores are trying to read and + write to the same memory resulting in excessive cache misses. + + Since spin locks continuously access memory during lock contention, + cache thrashing is a common occurrence due to the way cache + coherency is implemented. + + +Cache coherency in multi-processor systems +========================================== + +The memory hierarchy in multi-processor systems is composed of local +CPU caches (L1 caches), shared CPU caches (L2 caches) and the main +memory. To explain cache coherency we will ignore the L2 cache and +only consider the L1 caches and main memory. + +In the figure below we present a view of the memory hierarchy with two +variables A and B that fall into different cache lines and where +caches and the main memory are synchronized: + +.. slide:: Synchronized caches and memory + :inline-contents: True + :level: 2 + + |_| + + .. ditaa:: + + +-------+ +-------+ + | CPU 0 | | CPU 1 | + +-------+ +-------+ + cache cache + +-------+ +-------+ + A | 1 | | 1 | A + +-------+ +-------+ + B | 2 | | 2 | B + +-------+ +-------+ + memory + +-----------------------------+ + A | 1 | + +-----------------------------+ + B | 2 | + +-----------------------------+ + + +In the absence of a synchronization mechanism between the caches and +main memory, when CPU 0 executes `A = A + B` and CPU 1 executes `B = +A + B` we will have the following memory view: + +.. slide:: Unsynchronized caches and memory + :inline-contents: True + :level: 2 + + |_| + + .. ditaa:: + + +-------+ +-------+ + | CPU 0 | | CPU 1 | + +-------+ +-------+ + A <- A + B B <- A + B + + +-------+ +-------+ + A | 3 | | 1 | A + +-------+ +-------+ + B | 2 | | 3 | B + +-------+ +-------+ + write back caches + +-----------------------------+ + A | 1 | + +-----------------------------+ + B | 2 | + +-----------------------------+ + + +In order to avoid the situation above multi-processor systems use +cache coherency protocols. There are two main types of cache coherency +protocols: + +.. slide:: Cache Coherency Protocols + :inline-contents: True + :level: 2 + + * Bus snooping (sniffing) based: memory bus transactions are + monitored by caches and they take actions to preserve + coherency + + * Directory based: there is a separate entity (directory) that + maintains the state of caches; caches interact with directory + to preserve coherency + + Bus snooping is simpler but it performs poorly when the number of + cores goes beyond 32-64. + + Directory based cache coherence protocols scale much better (up + to thousands of cores) and are usually used in NUMA systems. + + +A simple cache coherency protocol that is commonly used in practice is +MESI (named after the acronym of the cache line states names: +**Modified**, **Exclusive**, **Shared** and **Invalid**). It's main +characteristics are: + +.. slide:: MESI Cache Coherence Protocol + :inline-contents: True + :level: 2 + + * Caching policy: write back + + * Cache line states + + * Modified: owned by a single core and dirty + + * Exclusive: owned by a single core and clean + + * Shared: shared between multiple cores and clean + + * Invalid : the line is not cached + +Issuing read or write requests from CPU cores will trigger state +transitions, as exemplified below: + +.. slide:: MESI State Transitions + :inline-contents: True + :level: 2 + + * Invalid -> Exclusive: read request, all other cores have the line + in Invalid; line loaded from memory + + * Invalid -> Shared: read request, at least one core has the line + in Shared or Exclusive; line loaded from sibling cache + + * Invalid/Shared/Exclusive -> Modified: write request; **all + other** cores **invalidate** the line + + * Modified -> Invalid: write request from other core; line is + flushed to memory + + +.. note:: The most important characteristic of the MESI protocol is + that it is a write-invalidate cache protocol. When writing to a + shared location all other caches are invalidated. + +This has important performance impact in certain access patterns, and +one such pattern is contention for a simple spin lock implementation +like we discussed above. + +To exemplify this issue lets consider a system with three CPU cores, +where the first has acquired the spin lock and it is running the +critical section while the other two are spinning waiting to enter the +critical section: + +.. slide:: Cache thrashing due to spin lock contention + :inline-contents: True + :level: 2 + + |_| + + .. ditaa:: + + +-------+ +-------+ +-------+ + | CPU 0 |<---------------+ | CPU 1 | Invalidate | CPU 0 | + | cache |<-------------+ | | cache |<---+ +---------->| cache | + +-------+ Invalidate | | +-------+ | | +-------+ + | | | | + | | +----------------------------+ + spin_lock(&lock); | | | | + | | READ lock | | + | +---- WRITE lock ---+ | + | | + | READ lock | + +-------------------------------- WRITE lock ----+ + + ... ... ... + READ data READ lock READ lock + | | | + | | | + | | | + +------------------------------+-------------------------+ + | + v + + cache miss + +As it can be seen from the figure above due to the writes issued by +the cores spinning on the lock we see frequent cache line invalidate +operations which means that basically the two waiting cores will flush +and load the cache line while waiting for the lock, creating +unnecessary traffic on the memory bus and slowing down memory accesses +for the first core. + +Another issue is that most likely data accessed by the first CPU +during the critical section is stored in the same cache line with the +lock (common optimization to have the data ready in the cache after +the lock is acquired). Which means that the cache invalidation +triggered by the two other spinning cores will slow down the execution +of the critical section which in turn triggers more cache invalidate +actions. + +Optimized spin locks +==================== + +As we have seen simple spin lock implementations can have poor +performance issues due to cache thrashing, especially as the number of +cores increase. To avoid this issue there are two possible strategies: + +* reduce the number of writes and thus reduce the number of cache + invalidate operations + +* avoid the other processors spinning on the same cache line, and thus + avoid the cache invalidate operations + + +An optimized spin lock implementation that uses the first approach is +presented below: + +.. slide:: Optimized spin lock (KeAcquireSpinLock) + :inline-contents: True + :level: 2 + + |_| + + .. code-block:: asm + + spin_lock: + rep ; nop + test lock_addr, 1 + jnz spin_lock + lock bts lock_addr + jc spin_lock + + + * we first test the lock read only, using a non atomic + instructions, to avoid writes and thus invalidate operations + while we spin + + * only when the lock *might* be free, we try to acquire it + +The implementation also use the **PAUSE** instruction to avoid +pipeline flushes due to (false positive) memory order violations and +to add a small delay (proportional with the memory bus frequency) to +reduce power consumption. + +A similar implementation with support for fairness (the CPU cores are +allowed in the critical section based on the time of arrival) is used +in the Linux kernel (the `ticket spin lock `_) +for many architectures. + +However, for the x86 architecture, the current spin lock +implementation uses a queued spin lock where the CPU cores spin on +different locks (hopefully distributed in different cache lines) to +avoid cache invalidation operations: + +.. slide:: Queued Spin Locks + :inline-contents: True + :level: 2 + + |_| + + .. ditaa:: + + +-------------------------------------------+ + | Queued Spin Lock cEEE | + | | + | +---+ +---+ +---+ +---+ | + | | |----->| |----->| |----->| | | + | +---+ +---+ +---+ +---+ | + | ^ ^ ^ ^ | + | | | | | | + +-------------------------------------------+ + | | | | + CPU10 CPU17 CPU99 CPU0 + owns the spins on spins on spins on + lock private private private + lock lock lock + + + +Conceptually, when a new CPU core tries to acquire the lock and it +fails it will add its private lock to the list of waiting CPU +cores. When the lock owner exits the critical section it unlocks the +next lock in the list, if any. + +While a read spin optimized spin lock reduces most of the cache +invalidation operations, the lock owner can still generate cache +invalidate operations due to writes to data structures close to the +lock and thus part of the same cache line. This in turn generates +memory traffic on subsequent reads on the spinning cores. + +Hence, queued spin locks scale much better for large number of cores +as is the case for NUMA systems. And since they have similar fairness +properties as the ticket lock it is the preferred implementation on +the x86 architecture. + + +Process and Interrupt Context Synchronization +============================================= + +Accessing shared data from both process and interrupt context is a +relatively common scenario. On single core systems we can do this by +disabling interrupts, but that won't work on multi-core systems, +as we can have the process running on one CPU core and the interrupt +context running on a different CPU core. + +Using a spin lock, which was designed for multi-processor systems, +seems like the right solution, but doing so can cause common +deadlock conditions, as detailed by the following scenario: + + +.. slide:: Process and Interrupt Handler Synchronization Deadlock + :inline-contents: True + :level: 2 + + * In the process context we take the spin lock + + * An interrupt occurs and it is scheduled on the same CPU core + + * The interrupt handler runs and tries to take the spin lock + + * The current CPU will deadlock + + +To avoid this issue a two fold approach is used: + + +.. slide:: Interrupt Synchronization for SMP + :inline-contents: True + :level: 2 + + * In process context: disable interrupts and acquire a spin lock; + this will protect both against interrupt or other CPU cores race + conditions (:c:func:`spin_lock_irqsave` and + :c:func:`spin_lock_restore` combine the two operations) + + * In interrupt context: take a spin lock; this will will protect + against race conditions with other interrupt handlers or process + context running on different processors + + +We have the same issue for other interrupt context handlers such as +softirqs, tasklets or timers and while disabling interrupts might +work, it is recommended to use dedicated APIs: + +.. slide:: Bottom-Half Synchronization for SMP + :inline-contents: True + :level: 2 + + * In process context use :c:func:`spin_lock_bh` (which combines + :c:func:`local_bh_disable` and :c:func:`spin_lock`) and + :c:func:`spin_unlock_bh` (which combines :c:func:`spin_unlock` and + :c:func:`local_bh_enable`) + + * In bottom half context use: :c:func:`spin_lock` and + :c:func:`spin_unlock` (or :c:func:`spin_lock_irqsave` and + :c:func:`spin_lock_irqrestore` if sharing data with interrupt + handlers) + + +As mentioned before, another source of concurrency in the Linux kernel +can be other processes, due to preemption. + +.. slide:: Preemption + :inline-contents: True + :level: 2 + + |_| + + Preemption is configurable: when active it provides better latency + and response time, while when deactivated it provides better + throughput. + + Preemption is disabled by spin locks and mutexes but it can be + manually disabled as well (by core kernel code). + + +As for local interrupt enabling and disabling APIs, the bottom half +and preemption APIs allows them to be used in overlapping critical +sections. A counter is used to track the state of bottom half and +preemption. In fact the same counter is used, with different increment +values: + +.. slide:: Preemption and Bottom-Half Masking + :inline-contents: True + :level: 2 + + .. code-block:: c + + #define PREEMPT_BITS 8 + #define SOFTIRQ_BITS 8 + #define HARDIRQ_BITS 4 + #define NMI_BITS 1 + + #define preempt_disable() preempt_count_inc() + + #define local_bh_disable() add_preempt_count(SOFTIRQ_OFFSET) + + #define local_bh_enable() sub_preempt_count(SOFTIRQ_OFFSET) + + #define irq_count() (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK)) + + #define in_interrupt() irq_count() + + asmlinkage void do_softirq(void) + { + if (in_interrupt()) return; + ... + + +Mutexes +======= + +Mutexes are used to protect against race conditions from other CPU +cores but they can only be used in **process context**. As opposed to +spin locks, while a thread is waiting to enter the critical section it +will not use CPU time, but instead it will be added to a waiting queue +until the critical section is vacated. + +Since mutexes and spin locks usage intersect, it is useful to compare +the two: + +.. slide:: Mutexes + :inline-contents: True + :level: 2 + + * They don't "waste" CPU cycles; system throughput is better than + spin locks if context switch overhead is lower than medium + spinning time + + * They can't be used in interrupt context + + * They have a higher latency than spin locks + +Conceptually, the :c:func:`mutex_lock` operation is relatively simple: +if the mutex is not acquired we can take the fast path via an atomic +exchange operation: + + +.. slide:: :c:func:`mutex_lock` fast path + :inline-contents: True + :level: 2 + + .. code-block:: c + + void __sched mutex_lock(struct mutex *lock) + { + might_sleep(); + + if (!__mutex_trylock_fast(lock)) + __mutex_lock_slowpath(lock); + } + + static __always_inline bool __mutex_trylock_fast(struct mutex *lock) + { + unsigned long curr = (unsigned long)current; + + if (!atomic_long_cmpxchg_acquire(&lock->owner, 0UL, curr)) + return true; + + return false; + } + + +otherwise we take the slow path where we add ourselves to the mutex +waiting list and put ourselves to sleep: + +.. slide:: :c:func:`mutex_lock` slow path + :inline-contents: True + :level: 2 + + .. code-block:: c + + ... + spin_lock(&lock->wait_lock); + ... + /* add waiting tasks to the end of the waitqueue (FIFO): */ + list_add_tail(&waiter.list, &lock->wait_list); + ... + waiter.task = current; + ... + for (;;) { + if (__mutex_trylock(lock)) + goto acquired; + ... + spin_unlock(&lock->wait_lock); + ... + set_current_state(state); + spin_lock(&lock->wait_lock); + } + spin_lock(&lock->wait_lock); + acquired: + __set_current_state(TASK_RUNNING); + mutex_remove_waiter(lock, &waiter, current); + spin_lock(&lock->wait_lock); + ... + +The full implementation is a bit more complex: instead of going to +sleep immediately it optimistic spinning if it detects that the lock +owner is currently running on a different CPU as chances are the owner +will release the lock soon. It also checks for signals and handles +mutex debugging for locking dependency engine debug feature. + + +The :c:func:`mutex_unlock` operation is symmetric: if there are no +waiters on the mutex then we can take the fast path via an atomic exchange +operation: + +.. slide:: :c:func:`mutex_unlock` fast path + :inline-contents: True + :level: 2 + + .. code-block:: c + + void __sched mutex_unlock(struct mutex *lock) + { + if (__mutex_unlock_fast(lock)) + return; + __mutex_unlock_slowpath(lock, _RET_IP_); + } + + static __always_inline bool __mutex_unlock_fast(struct mutex *lock) + { + unsigned long curr = (unsigned long)current; + + if (atomic_long_cmpxchg_release(&lock->owner, curr, 0UL) == curr) + return true; + + return false; + } + + void __mutex_lock_slowpath(struct mutex *lock) + { + ... + if (__mutex_waiter_is_first(lock, &waiter)) + __mutex_set_flag(lock, MUTEX_FLAG_WAITERS); + ... + + +.. note:: Because :c:type:`struct task_struct` is cached aligned the 7 + lower bits of the owner field can be used for various flags, + such as :c:type:`MUTEX_FLAG_WAITERS`. + + +Otherwise we take the slow path where we pick up first waiter from the +list and wake it up: + +.. slide:: :c:func:`mutex_unlock` slow path + :inline-contents: True + :level: 2 + + .. code-block:: c + + ... + spin_lock(&lock->wait_lock); + if (!list_empty(&lock->wait_list)) { + /* get the first entry from the wait-list: */ + struct mutex_waiter *waiter; + waiter = list_first_entry(&lock->wait_list, struct mutex_waiter, + list); + next = waiter->task; + wake_q_add(&wake_q, next); + } + ... + spin_unlock(&lock->wait_lock); + ... + wake_up_q(&wake_q); + + + +Per CPU data +============ + +Per CPU data avoids race conditions by avoiding to use shared +data. Instead, an array sized to the maximum possible CPU cores is +used and each core will use its own array entry to read and write +data. This approach certainly has advantages: + + +.. slide:: Per CPU data + :inline-contents: True + :level: 2 + + * No need to synchronize to access the data + + * No contention, no performance impact + + * Well suited for distributed processing where aggregation is only + seldom necessary (e.g. statistics counters) + + +Memory Ordering and Barriers +============================ + +Modern processors and compilers employ out-of-order execution to +improve performance. For example, processors can execute "future" +instructions while waiting for current instruction data to be fetched +from memory. + +Here is an example of out of order compiler generated code: + +.. slide:: Out of Order Compiler Generated Code + :inline-contents: True + :level: 2 + + +-------------------+-------------------------+ + | C code | Compiler generated code | + +-------------------+-------------------------+ + |.. code-block:: c |.. code-block:: asm | + | | | + | a = 1; | MOV R10, 1 | + | b = 2; | MOV R11, 2 | + | | STORE R11, b | + | | STORE R10, a | + +-------------------+-------------------------+ + + +.. note:: When executing instructions out of order the processor makes + sure that data dependency is observed, i.e. it won't execute + instructions whose input depend on the output of a previous + instruction that has not been executed. + +In most cases out of order execution is not an issue. However, in +certain situations (e.g. communicating via shared memory between +processors or between processors and hardware) we must issue some +instructions before others even without data dependency between them. + +For this purpose we can use barriers to order memory operations: + +.. slide:: Barriers + :inline-contents: True + :level: 2 + + * A read barrier (:c:func:`rmb()`, :c:func:`smp_rmb()`) is used to + make sure that no read operation crosses the barrier; that is, + all read operation before the barrier are complete before + executing the first instruction after the barrier + + * A write barrier (:c:func:`wmb()`, :c:func:`smp_wmb()`) is used to + make sure that no write operation crosses the barrier + + * A simple barrier (:c:func:`mb()`, :c:func:`smp_mb()`) is used + to make sure that no write or read operation crosses the barrier + + +Read Copy Update (RCU) +====================== + +Read Copy Update is a special synchronization mechanism similar with +read-write locks but with significant improvements over it (and some +limitations): + +.. slide:: Read Copy Update (RCU) + :level: 2 + :inline-contents: True + + * **Read-only** lock-less access at the same time with write access + + * Write accesses still requires locks in order to avoid races + between writers + + * Requires unidirectional traversal by readers + + +In fact, the read-write locks in the Linux kernel have been deprecated +and then removed, in favor of RCU. + +Implementing RCU for a new data structure is difficult, but a few +common data structures (lists, queues, trees) do have RCU APIs that +can be used. + +RCU splits removal updates to the data structures in two phases: + +.. slide:: Removal and Reclamation + :inline-contents: True + :level: 2 + + * **Removal**: removes references to elements. Some old readers may + still see the old reference so we can't free the element. + + * **Elimination**: free the element. This action is postponed until + all existing readers finish traversal (quiescent cycle). New + readers won't affect the quiescent cycle. + + +As an example, lets take a look on how to delete an element from a +list using RCU: + +.. slide:: RCU List Delete + :inline-contents: True + :level: 2 + + |_| + + .. ditaa:: + + (1) List Traversal (2) Removal + +-----------+ + +-----+ +-----+ +-----+ +-----+ | +-----+ | +-----+ + | | | | | | | | | | | | | | + | A |---->| B |---->| C | | A |--+ | B |--+->| C | + | | | | | | | | | | | | + +-----+ +-----+ +-----+ +-----+ +-----+ +-----+ + ^ ^ ^ ^ ^ ^ + | | | | | | + + + + + + + + (3) Quiescent cycle over (4) Reclamation + +-----------+ + +-----+ | +-----+ | +-----+ +-----+ +-----+ + | | | | | | | | | | | | + | A |--+ | B | +->| C | | A |---------------->| C | + | | | | | | | | | | + +-----+ +-----+ +-----+ +-----+ +-----+ + ^ ^ ^ ^ + | | | | + + +In the first step it can be seen that while readers traverse the list +all elements are referenced. In step two a writer removes +element B. Reclamation is postponed since there are still readers that +hold references to it. In step three a quiescent cycle just expired +and it can be noticed that there are no more references to +element B. Other elements still have references from readers that +started the list traversal after the element was removed. In step 4 we +finally perform reclamation (free the element). + + +Now that we covered how RCU functions at the high level, lets looks at +the APIs for traversing the list as well as adding and removing an +element to the list: + + +.. slide:: RCU list APIs cheat sheet + :inline-contents: True + :level: 2 + + .. code-block:: c + + /* list traversal */ + rcu_read_lock(); + list_for_each_entry_rcu(i, head) { + /* no sleeping, blocking calls or context switch allowed */ + } + rcu_read_unlock(); + + + /* list element delete */ + spin_lock(&lock); + list_del_rcu(&node->list); + spin_unlock(&lock); + synchronize_rcu(); + kfree(node); + + /* list element add */ + spin_lock(&lock); + list_add_rcu(head, &node->list); + spin_unlock(&lock); + diff --git a/Documentation/teaching/lectures/syscalls.rst b/Documentation/teaching/lectures/syscalls.rst new file mode 100644 index 00000000000000..49d864f3e7324f --- /dev/null +++ b/Documentation/teaching/lectures/syscalls.rst @@ -0,0 +1,611 @@ +============ +System Calls +============ + +`View slides `_ + +.. slideconf:: + :autoslides: False + :theme: single-level + +Lecture objectives: +=================== + +.. slide:: System Calls + :inline-contents: True + :level: 2 + + * Linux system calls implementation + + * VDSO and virtual syscalls + + * Accessing user space from system calls + + + +Linux system calls implementation +================================= + +At a high level system calls are "services" offered by the kernel to +user applications and they resemble library APIs in that they are +described as a function call with a name, parameters, and return value. + +.. slide:: System Calls as Kernel services + :inline-contents: True + :level: 2 + + |_| + + .. ditaa:: + + +-------------+ +-------------+ + | Application | | Application | + +-------------+ +-------------+ + | | + |read(fd, buff, len) |fork() + | | + v v + +---------------------------------------+ + | Kernel | + +---------------------------------------+ + + +However, on a closer look, we can see that system calls are actually +not function calls, but specific assembly instructions (architecture +and kernel specific) that do the following: + +.. slide:: System Call Setup + :inline-contents: True + :level: 2 + + * setup information to identify the system call and its parameters + + * trigger a kernel mode switch + + * retrieve the result of the system call + +In Linux, system calls are identified by numbers and the parameters +for system calls are machine word sized (32 or 64 bit). There can be a +maximum of 6 system call parameters. Both the system call number and +the parameters are stored in certain registers. + +For example, on 32bit x86 architecture, the system call identifier is +stored in the EAX register, while parameters in registers EBX, ECX, +EDX, ESI, EDI, EBP. + +.. slide:: Linux system call setup + :inline-contents: False + :level: 2 + + * System calls are identified by numbers + + * The parameters for system calls are machine word sized (32 or 64 + bit) and they are limited to a maximum of 6 + + * Uses registers to store them both (e.g. for 32bit x86: EAX for + system call and EBX, ECX, EDX, ESI, EDI, EBP for parameters) + +System libraries (e.g. libc) offers functions that implement the +actual system calls in order to make it easier for applications to use +them. + +When a user to kernel mode transition occurs, the execution flow is +interrupted and it is transferred to a kernel entry point. This is +similar to how interrupts and exceptions are handled (in fact on some +architectures this transition happens as a result of an exception). + +The system call entry point will save registers (which contains values +from user space, including system call number and system call +parameters) on stack and then it will continue with executing the +system call dispatcher. + +.. note:: During the user - kernel mode transition the stack is also + switched from the user stack to the kernel stack. This is + explained in more details in the interrupts lecture. + +.. slide:: Example of Linux system call setup and handling + :inline-contents: True + :level: 2 + + .. ditaa:: + + +-------------+ dup2 +-----------------------------+ + | Application |-----+ | libc | + +-------------+ | | | + +---->| C7590 dup2: | + | ... | + | C7592 movl 0x8(%esp),%ecx | + | C7596 movl 0x4(%esp),%ebx | + | C759a movl $0x3f,%eax | + +------------------------------+ C759f int $0x80 | + | | ... +<-----+ + | +-----------------------------+ | + | | + | | + | | + | | + | +------------------------------------------------------------+ | + | | Kernel | | + | | | | + +--->|ENTRY(entry_INT80_32) | | + | ASM_CLAC | | + | pushl %eax # pt_regs->orig_ax | | + | SAVE_ALL pt_regs_ax=$-ENOSYS # save rest | | + | ... | | + | movl %esp, %eax | | + | call do_int80_syscall_32 | | + | .... | | + | RESTORE_REGS 4 # skip orig_eax/error_code | | + | ... | | + | INTERRUPT_RETURN +-+ + +------------------------------------------------------------+ + + +The purpose of the system call dispatcher is to verify the system call +number and run the kernel function associated with the system call. + +.. slide:: Linux System Call Dispatcher + :inline-contents: True + :level: 2 + + .. code-block:: c + + /* Handles int $0x80 */ + __visible void do_int80_syscall_32(struct pt_regs *regs) + { + enter_from_user_mode(); + local_irq_enable(); + do_syscall_32_irqs_on(regs); + } + + /* simplified version of the Linux x86 32bit System Call Dispatcher */ + static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs) + { + unsigned int nr = regs->orig_ax; + + if (nr < IA32_NR_syscalls) + regs->ax = ia32_sys_call_table[nr](regs->bx, regs->cx, + regs->dx, regs->si, + regs->di, regs->bp); + syscall_return_slowpath(regs); + } + + + +To demonstrate the system call flow we are going to use the virtual +machine setup, attach gdb to a running kernel, add a breakpoint to the +dup2 system call and inspect the state. + +.. slide:: Inspecting dup2 system call + :inline-contents: True + :level: 2 + + |_| + + .. asciicast:: ../res/syscalls-inspection.cast + + +In summary, this is what happens during a system call: + +.. slide:: System Call Flow Summary + :inline-contents: True + :level: 2 + + * The application is setting up the system call number and + parameters and it issues a trap instruction + + * The execution mode switches from user to kernel; the CPU switches + to a kernel stack; the user stack and the return address to user + space is saved on the kernel stack + + * The kernel entry point saves registers on the kernel stack + + * The system call dispatcher identifies the system call function + and runs it + + * The user space registers are restored and execution is switched + back to user (e.g. calling IRET) + + * The user space application resumes + + +System call table +----------------- + +The system call table is what the system call dispatcher uses to map +system call numbers to kernel functions: + +.. slide:: System Call Table + :inline-contents: True + :level: 2 + + .. code-block:: c + + #define __SYSCALL_I386(nr, sym, qual) [nr] = sym, + + const sys_call_ptr_t ia32_sys_call_table[] = { + [0 ... __NR_syscall_compat_max] = &sys_ni_syscall, + #include + }; + + .. code-block:: c + + __SYSCALL_I386(0, sys_restart_syscall) + __SYSCALL_I386(1, sys_exit) + __SYSCALL_I386(2, sys_fork) + __SYSCALL_I386(3, sys_read) + __SYSCALL_I386(4, sys_write) + #ifdef CONFIG_X86_32 + __SYSCALL_I386(5, sys_open) + #else + __SYSCALL_I386(5, compat_sys_open) + #endif + __SYSCALL_I386(6, sys_close) + + + +System call parameters handling +------------------------------- + +Handling system call parameters is tricky. Since these values are +setup by user space, the kernel can not assume correctness and must +always verify them thoroughly. + +Pointers have a few important special cases that must be checked: + +.. slide:: System Calls Pointer Parameters + :inline-contents: True + :level: 2 + + * Never allow pointers to kernel-space + + * Check for invalid pointers + + +Since system calls are executed in kernel mode, they have access to +kernel space and if pointers are not properly checked user +applications might get read or write access to kernel space. + +For example, let's consider the case where such a check is not made for +the read or write system calls. If the user passes a kernel-space +pointer to a write system call then it can get access to kernel data +by later reading the file. If it passes a kernel-space pointer to a +read system call then it can corrupt kernel memory. + + +.. slide:: Pointers to Kernel Space + :level: 2 + + * User access to kernel data if allowed in a write system call + + * User corrupting kernel data if allowed in a read system call + + +Likewise, if a pointer passed by the application is invalid +(e.g. unmapped, read-only for cases where it is used for writing), it +could "crash" the kernel. Two approaches could be used: + +.. slide:: Invalid pointers handling approaches + :inline-contents: True + :level: 2 + + * Check the pointer against the user address space before using it, + or + + * Avoid checking the pointer and rely on the MMU to detect when the + pointer is invalid and use the page fault handler to determine + that the pointer was invalid + + +Although it sounds tempting, the second approach is not that easy to +implement. The page fault handler uses the fault address (the address +that was accessed), the faulting address (the address of the +instruction that did the access) and information from the user address +space to determine the cause: + +.. slide:: Page fault handling + :inline-contents: True + :level: 2 + + * Copy on write, demand paging, swapping: both the fault and + faulting addresses are in user space; the fault address is + valid (checked against the user address space) + + * Invalid pointer used in system call: the faulting address is + in kernel space; the fault address is in user space and it is + invalid + + * Kernel bug (kernel accesses invalid pointer): same as above + +But in the last two cases we don't have enough information to +determine the cause of the fault. + +In order to solve this issue, Linux uses special APIs (e.g +:c:func:`copy_to_user`) to accesses user space that are specially +crafted: + +.. slide:: Marking kernel code that accesses user space + :inline-contents: True + :level: 2 + + * The exact instructions that access user space are recorded in a + table (exception table) + + * When a page fault occurs the faulting address is checked against + this table + + +Although the fault handling case may be more costly overall depending +on the address space vs exception table size, and it is more complex, +it is optimized for the common case and that is why it is preferred +and used in Linux. + + +.. slide:: Cost analysis for pointer checks vs fault handling + :inline-contents: True + :level: 2 + + +------------------+-----------------------+------------------------+ + | Cost | Pointer checks | Fault handling | + +==================+=======================+========================+ + | Valid address | address space search | negligible | + +------------------+-----------------------+------------------------+ + | Invalid address | address space search | exception table search | + +------------------+-----------------------+------------------------+ + + +Virtual Dynamic Shared Object (VDSO) +==================================== + +The VDSO mechanism was born out of the necessity of optimizing the +system call implementation, in a way that does not impact libc with +having to track the CPU capabilities in conjunction with the kernel +version. + +For example, x86 has two ways of issuing system calls: int 0x80 and +sysenter. The latter is significantly faster so it should be used when +available. However, it is only available for processors newer than +Pentium II and only for kernel versions greater than 2.6. + +With VDSO the system call interface is decided by the kernel: + +.. slide:: Virtual Dynamic Shared Object (VDSO) + :inline-contents: True + :level: 2 + + * a stream of instructions to issue the system call is generated by + the kernel in a special memory area (formatted as an ELF shared + object) + + * that memory area is mapped towards the end of the user address + space + + * libc searches for VDSO and if present will use it to issue the + system call + + +.. slide:: Inspecting VDSO + :inline-contents: True + :level: 2 + + |_| + + .. asciicast:: ../res/syscalls-vdso.cast + + + +An interesting development of the VDSO is the virtual system calls +(vsyscalls) which run directly from user space. These vsyscalls are +also part of VDSO and they are accessing data from the VDSO page that +is either static or modified by the kernel in a separate read-write +map of the VDSO page. Examples of system calls that can be implemented +as vsyscalls are: getpid or gettimeofday. + + +.. slide:: Virtual System Calls (vsyscalls) + :inline-contents: True + :level: 2 + + * "System calls" that run directly from user space, part of the VDSO + + * Static data (e.g. getpid()) + + * Dynamic data update by the kernel a in RW map of the VDSO + (e.g. gettimeofday(), time(), ) + + +Accessing user space from system calls +====================================== + +As we mentioned earlier, user space must be accessed with special APIs +(:c:func:`get_user`, :c:func:`put_user`, :c:func:`copy_from_user`, +:c:func:`copy_to_user`) that check whether the pointer is in user space +and also handle the fault if the pointer is invalid. In case of invalid +pointers, they return a non-zero value. + +.. slide:: Accessing user space from system calls + :inline-contents: True + :level: 2 + + .. code-block:: c + + /* OK: return -EFAULT if user_ptr is invalid */ + if (copy_from_user(&kernel_buffer, user_ptr, size)) + return -EFAULT; + + /* NOK: only works if user_ptr is valid otherwise crashes kernel */ + memcpy(&kernel_buffer, user_ptr, size); + + +Let's examine the simplest API, get_user, as implemented for x86: + +.. slide:: get_user implementation + :inline-contents: True + :level: 2 + + .. code-block:: c + + #define get_user(x, ptr) \ + ({ \ + int __ret_gu; \ + register __inttype(*(ptr)) __val_gu asm("%"_ASM_DX); \ + __chk_user_ptr(ptr); \ + might_fault(); \ + asm volatile("call __get_user_%P4" \ + : "=a" (__ret_gu), "=r" (__val_gu), \ + ASM_CALL_CONSTRAINT \ + : "0" (ptr), "i" (sizeof(*(ptr)))); \ + (x) = (__force __typeof__(*(ptr))) __val_gu; \ + __builtin_expect(__ret_gu, 0); \ + }) + + +The implementation uses inline assembly, which allows inserting ASM +sequences in C code and also handles access to/from variables in the +ASM code. + +Based on the type size of the x variable, one of __get_user_1, +__get_user_2 or __get_user_4 will be called. Also, before executing +the assembly call, ptr will be moved to the first register EAX while +after the completion of assembly part the value of EAX will be moved +to __ret_gu and the EDX register will be moved to __val_gu. + +It is equivalent to the following pseudo code: + + +.. slide:: get_user pseudo code + :inline-contents: True + :level: 2 + + .. code-block:: c + + #define get_user(x, ptr) \ + movl ptr, %eax \ + call __get_user_1 \ + movl %edx, x \ + movl %eax, result \ + + + +The __get_user_1 implementation for x86 is the following: + +.. slide:: get_user_1 implementation + :inline-contents: True + :level: 2 + + .. code-block:: none + + .text + ENTRY(__get_user_1) + mov PER_CPU_VAR(current_task), %_ASM_DX + cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX + jae bad_get_user + ASM_STAC + 1: movzbl (%_ASM_AX),%edx + xor %eax,%eax + ASM_CLAC + ret + ENDPROC(__get_user_1) + + bad_get_user: + xor %edx,%edx + mov $(-EFAULT),%_ASM_AX + ASM_CLAC + ret + END(bad_get_user) + + _ASM_EXTABLE(1b,bad_get_user) + +The first two statements check the pointer (which is stored in EDX) +with the addr_limit field of the current task (process) descriptor to +make sure that we don't have a pointer to kernel space. + +Then, SMAP is disabled, to allow access to user from kernel, and the +access to user space is done with the instruction at the 1: label. EAX +is then zeroed to mark success, SMAP is enabled, and the call returns. + +The movzbl instruction is the one that does the access to user space +and its address is captured with the 1: label and stored in a special +section: + +.. slide:: Exception table entry + :inline-contents: True + :level: 2 + + .. code-block:: c + + /* Exception table entry */ + # define _ASM_EXTABLE_HANDLE(from, to, handler) \ + .pushsection "__ex_table","a" ; \ + .balign 4 ; \ + .long (from) - . ; \ + .long (to) - . ; \ + .long (handler) - . ; \ + .popsection + + # define _ASM_EXTABLE(from, to) \ + _ASM_EXTABLE_HANDLE(from, to, ex_handler_default) + + +For each address that accesses user space we have an entry in the +exception table, that is made up of: the faulting address(from), where +to jump to in case of a fault, and a handler function (that implements +the jump logic). All of these addresses are stored on 32bit in +relative format to the exception table, so that they work for both 32 +and 64 bit kernels. + + +All of the exception table entries are then collected in the +__ex_table section by the linker script: + +.. slide:: Exception table building + :inline-contents: True + :level: 2 + + .. code-block:: c + + #define EXCEPTION_TABLE(align) \ + . = ALIGN(align); \ + __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { \ + VMLINUX_SYMBOL(__start___ex_table) = .; \ + KEEP(*(__ex_table)) \ + VMLINUX_SYMBOL(__stop___ex_table) = .; \ + } + + +The section is guarded with __start___ex_table and __stop___ex_table +symbols, so that it is easy to find the data from C code. This table +is accessed by the fault handler: + + +.. slide:: Exception table handling + :inline-contents: True + :level: 2 + + .. code-block:: c + + bool ex_handler_default(const struct exception_table_entry *fixup, + struct pt_regs *regs, int trapnr) + { + regs->ip = ex_fixup_addr(fixup); + return true; + } + + int fixup_exception(struct pt_regs *regs, int trapnr) + { + const struct exception_table_entry *e; + ex_handler_t handler; + + e = search_exception_tables(regs->ip); + if (!e) + return 0; + + handler = ex_fixup_handler(e); + return handler(e, regs, trapnr); + } + + +All it does is to set the return address to the one in the field of +the exception table entry which, in case of the get_user exception +table entry, is bad_get_user which return -EFAULT to the caller. + diff --git a/Documentation/teaching/lectures/virt.rst b/Documentation/teaching/lectures/virt.rst new file mode 100644 index 00000000000000..7804039114aa96 --- /dev/null +++ b/Documentation/teaching/lectures/virt.rst @@ -0,0 +1,651 @@ +============== +Virtualization +============== + +`View slides `_ + +.. slideconf:: + :autoslides: False + :theme: single-level + +Lecture objectives: +=================== + +.. slide:: Virtualization + :inline-contents: True + :level: 2 + + * Emulation basics + + * Virtualization basics + + * Paravirtualization basics + + * Hardware support for virtualization + + * Overview of the Xen hypervisor + + * Overview of the KVM hypervisor + + +Emulation basics +================ + +.. slide:: Emulation basics + :inline-contents: True + :level: 2 + + * Instructions are emulated (each time they are executed) + + * The other system components are also emulated: + + * MMU + + * Physical memory access + + * Peripherals + + * Target architecture - the architecture that it is emulated + + * Host architecture - the architecture that the emulator runs on + + * For emulation target and host architectures can be different + + +Virtualization basics +===================== + +.. slide:: Virtualization basics + :inline-contents: True + :level: 2 + + * Defined in a paper by Popek & Goldberg in 1974 + + * Fidelity + + * Performance + + * Security + + .. ditaa:: + + +----+ +----+ +----+ + | VM | | VM | ... | VM | + +----+ +----+ +----+ + + +-------------------------+ + | Virtual Machine Monitor | + +-------------------------+ + + +-------------------------+ + | Hardware | + +-------------------------+ + + +Classic virtualization +====================== + +.. slide:: Classic virtualization + :inline-contents: True + :level: 2 + + * Trap & Emulate + + * Same architecture for host and target + + * Most of the target instructions are natively executed + + * Target OS runs in non-privilege mode on the host + + * Privileged instructions are trapped and emulated + + * Two machine states: host and guest + + +Software virtualization +======================= + +.. slide:: Software virtualization + :inline-contents: True + :level: 2 + + * Not all architecture can be virtualized; e.g. x86: + + * CS register encodes the CPL + + * Some instructions don't generate a trap (e.g. popf) + + * Solution: emulate instructions using binary translation + + +MMU virtualization +================== + +.. slide:: MMU virtualization + :inline-contents: True + :level: 2 + + * "Fake" VM physical addresses are translated by the host to actual + physical addresses + + * Guest virtual address -> Guest physical address -> Host Physical Address + + * The guest page tables are not directly used by the host hardware + + * VM page tables are verified then translated into a new set of page + tables on the host (shadow page tables) + + +Shadow page tables +------------------ + +.. slide:: Shadow page tables + :inline-contents: True + :level: 2 + + |_| + + .. ditaa:: + + PGD PMD PT + +----------+ +----------+ +----------+ + | | | | | | Guest Physical Page + +----------+ +----------+ +----------+ +----------+ + | | | | | |----+ | | + +-----+ +----------+ +----------+ +----------+ | | | + | CR3 | | |----+ | |---+ | | | | | + +-----+ +----------+ | +----------+ | +----------+ +--->+----------+ + | | | | | | | | | + +---------> +----------+ +------>+----------+ +---->+----------+ + Write Protected Write Protected Write Protected + | + | + Guest (VM) | + | trap access + | + ---------------------+------------------------------------------------------------------------------ + | + | check access, transform GPP to HPP + | + v + + Shadow PGD Shadow PMD Shadow PT + +----------+ +----------+ +----------+ + | | | | | | Host Physical Page + +----------+ +----------+ +----------+ +----------+ + | | | | | |----+ | | + +----------+ +----------+ +----------+ | | | + | |----+ | |---+ | | | | | + +----------+ | +----------+ | +----------+ +--->+----------+ + | | | | | | | | + +----------+ +------>+----------+ +---->+----------+ + + + +Lazy shadow sync +---------------- + +.. slide:: Lazy shadow sync + :inline-contents: True + :level: 2 + + * Guest page tables changes are typically batched + + * To avoid repeated traps, checks and transformations map guest + page table entries with write access + + * Update the shadow page table when + + * The TLB is flushed + + * In the host page fault handler + + +I/O emulation +============= + +.. slide:: I/O emulation + :inline-contents: True + :level: 2 + + |_| + + .. ditaa:: + + +---------------------+ + | Guest OS | + | +---------------+ | + | | Guest Driver | | + | +---------------+ | + | | ^ | + | | | | + +----+-----------+----+ + | trap | + | access | + +---+-----------+----+ + | | VMM | | + | v | | + | +----------------+ | + | | Virtual Device | | + | +----------------+ | + | | ^ | + | | | | + +--+------------+----+ + | | + v | + +-----------------+ + | Physical Device | + +-----------------+ + + +.. slide:: Example: qemu SiFive UART emulation + :inline-contents: True + :level: 2 + + .. literalinclude:: ../res/sifive_uart.c + :language: c + + +Paravirtualization +================== + +.. slide:: Paravirtualization + :inline-contents: True + :level: 2 + + * Change the guest OS so that it cooperates with the VMM + + * CPU paravirtualization + + * MMU paravirtualization + + * I/O paravirtualization + + * VMM exposes hypercalls for: + + * activate / deactivate the interrupts + + * changing page tables + + * accessing virtualized peripherals + + * VMM uses events to trigger interrupts in the VM + + +Intel VT-x +========== + +.. slide:: Intel VT-x + :inline-contents: True + :level: 2 + + + * Hardware extension to transform x86 to the point it can be + virtualized "classically" + + * New execution mode: non-root mode + + * Each non-root mode instance uses a Virtual Machine Control + Structure (VMCS) to store its state + + * VMM runs in root mode + + * VM-entry and VM-exit are used to transition between the two modes + + +Virtual Machine Control Structure +--------------------------------- + +.. slide:: Virtual Machine Control Structure + :inline-contents: True + :level: 2 + + * Guest information: state of the virtual CPU + + * Host information: state of the physical CPU + + * Saved information: + + * visible state: segment registers, CR3, IDTR, etc. + + * internal state + + * VMCS can not be accessed directly but certain information can be + accessed with special instructions + +VM entry & exit +--------------- + +.. slide:: VM entry & exit + :inline-contents: True + :level: 2 + + * VM entry - new instructions that switches the CPU in non-root + mode and loads the VM state from a VMCS; host state is saved in + VMCS + + * Allows injecting interrupts and exceptions in the guest + + * VM exit will be automatically triggered based on the VMCS + configuration + + * When VM exit occurs host state is loaded from VMCS, guest state + is saved in VMCS + +VM execution control fields +--------------------------- + +.. slide:: VM execution control fields + :inline-contents: True + :level: 2 + + * Selects conditions which triggers a VM exit; examples: + + * If an external interrupt is generated + + * If an external interrupt is generated and EFLAGS.IF is set + + * If CR0-CR4 registers are modified + + * Exception bitmap - selects which exceptions will generate a VM + exit + + * IO bitmap - selects which I/O addresses (IN/OUT accesses) + generates a VM exit + + * MSR bitmaps - selects which RDMSR or WRMSR instructions will + generate a VM exit + + +Extend Page Tables +================== + +.. slide:: Extend Page Tables + :inline-contents: True + :level: 2 + + * Reduces the complexity of MMU virtualization and improves + performance + + * Access to CR3, INVLPG and page faults do not require VM exit + anymore + + * The EPT page table is controlled by the VMM + + .. ditaa:: + + +-----+ +-----+ + | CR3 | | EPT | + +-----+ +-----+ + | +------------------+ | +----------------+ + | | | | | | + +--------> | Guest Page Table | +-------> | EPT Page Table | ---------------> + | | | | + ------------> +------------------+ ------------> +----------------+ + + Guest Virtual Guest Physical Host Physical + Address Address Address + + +VPID +---- + +.. slide:: VPID + :inline-contents: True + :level: 2 + + * VM entry and VM exit forces a TLB flush - loses VMM / VM translations + + * To avoid this issue a VPID (Virtual Processor ID) tag is + associated with each VM (VPID 0 is reserved for the VMM) + + * All TLB entries are tagged + + * At VM entry and exit just the entries associated with the tags + are flushed + + * When searching the TLB just the current VPID is used + + +I/O virtualization +================== + + * Direct access to hardware from a VM - in a controlled fashion + + * Map the MMIO host directly to the guest + + * Forward interrupts + +.. slide:: I/O virtualization + :inline-contents: True + :level: 2 + + .. ditaa:: + + +---------------------+ +---------------------+ + | Guest OS | | Guest OS | + | +---------------+ | | +---------------+ | + | | Guest Driver | | | | Guest Driver | | + | +---------------+ | | +---------------+ | + | | ^ | | | ^ | + | | | | | | | | + +----+-----------+----+ +----+-----------+----+ + | traped | | mapped | + | access | | access | + +---+-----------+----+ +---+-----------+-----+ But how do we deal with DMA? + | | VMM | | | | VMM | | + | v | | | | | | + | +----------------+ | | | +---------+ | + | | Virtual Device | | | | | IRQ | | + | +----------------+ | | | | Mapping | | + | | ^ | | | +---------+ | + | | | | | | | | + +--+------------+----+ +---+-----------+-----+ + | | | | + v | v | + +-----------------+ +-----------------+ + | Physical Device | | Physical Device | + +-----------------+ +-----------------+ + +Instead of trapping MMIO as with emulated devices we can allow the +guest to access the MMIO directly by mapping through its page tables. + +Interrupts from the device are handled by the host kernel and a signal +is send to the VMM which injects the interrupt to the guest just as +for the emulated devices. + + +.. slide:: I/O MMU + :inline-contents: True + :level: 2 + + VT-d protects and translates VM physical addresses using an I/O + MMU (DMA remaping) + + .. ditaa:: + + +------+ +------+ + | | | | + | CPU | | DMA | + | | | | + +------+ +------+ + | + | + v + +-----+ +-----+ + | CR3 | | EPT | + +-----+ +-----+ + | +------------------+ | +----------------+ + | | | | | | + +--------> | Guest Page Table | +-------> | EPT Page Table | ---------------> + | | | | + ------------> +------------------+ ------------> +----------------+ + + Guest Virtual Guest Physical Host Physical + Address Address Address + + +.. slide:: Interrupt posting + :inline-contents: True + :level: 2 + + * Messsage Signaled Interrupts (MSI) = DMA writes to the host + address range of the IRQ controller (e.g. 0xFEExxxxx) + + * Low bits of the address and the data indicate which interrupt + vector to deliver to which CPU + + * Interrupt remapping table points to the virtual CPU (VMCS) that + should receive the interrupt + + * I/O MMU will trap the IRQ controller write and look it up in the + interrupt remmaping table + + * if that virtual CPU is currently running it will take the + interrupt directly + + * otherwise a bit is set in a table (Posted Interrupt Descriptor + table) and the interrupt will be inject next time that vCPU is + run + + +.. slide:: I/O virtualization + :inline-contents: True + :level: 2 + + .. ditaa:: + + +---------------------+ +---------------------+ +---------------------+ + | Guest OS | | Guest OS | | Guest OS | + | +---------------+ | | +---------------+ | | +---------------+ | + | | Guest Driver | | | | Guest Driver | | | | Guest Driver | | + | +---------------+ | | +---------------+ | | +---------------+ | + | | ^ | | | ^ | | | ^ | + | | | | | | | | | | | | + +----+-----------+----+ +----+-----------+----+ +----+-----------+----+ + | traped | | mapped | | mapped | interrupt + | access | | access | | access | posting + +---+-----------+----+ +---+-----------+-----+ +---+-----------+-----+ + | | VMM | | | | VMM | | | | VMM | | + | v | | | | | | | | | | + | +----------------+ | | | +---------+ | | | | | + | | Virtual Device | | | | | IRQ | | | | | | + | +----------------+ | | | | Mapping | | | | | | + | | ^ | | | +---------+ | | | | | + | | | | | | | | | | | | + +--+------------+----+ +---+-----------+-----+ +---+-----------+-----+ + | | | | | | + v | v | v | + +-----------------+ +-----------------+ +-----------------+ + | Physical Device | | Physical Device | | Physical Device | + +-----------------+ +-----------------+ +-----------------+ + + + +.. slide:: SR-IOV + :inline-contents: True + :level: 2 + + * Single Root - Input Output Virtualization + + * Physical device with multiple Ethernet ports will be shown as + multiple device on the PCI bus + + * Physical Function is used for the control and can be configured + + * to present itself as a new PCI device + + * which VLAN to use + + * The new virtual function is enumerated on the bus and can be + assigned to a particular guest + + +qemu +==== + +.. slide:: qemu + :inline-contents: True + :level: 2 + + * Uses binary translation via Tiny Code Generator (TCG) for + efficient emulation + + * Supports different target and host architectures (e.g. running + ARM VMs on x86) + + * Both process and full system level emulation + + * MMU emulation + + * I/O emulation + + * Can be used with KVM for accelerated virtualization + +KVM +=== + +.. slide:: KVM + :inline-contents: True + :level: 2 + + .. ditaa:: + + VM1 (qemu) VM2 (qemu) + +---------------------+ +---------------------+ + | +------+ +------+ | | +------+ +------+ | + | | App1 | | App2 | | | | App1 | | App2 | | + | +------+ +------+ | | +------+ +------+ | + | +-----------------+ | | +-----------------+ | + | | Guest Kernel | | | | Guest Kernel | | + | +-----------------+ | | +-----------------+ | + +---------------------+ +---------------------+ + + +----------------------------------------------------+ + | +-----+ | + | | KVM | Host Linux Kernel | + | +-----+ | + +----------------------------------------------------+ + + +----------------------------------------------------+ + | Hardware with virtualization support | + +----------------------------------------------------+ + + +.. slide:: KVM + :inline-contents: True + :level: 2 + + * Linux device driver for hardware virtualization (e.g. Intel VT-x, SVM) + + * IOCTL based interface for managing and running virtual CPUs + + * VMM components implemented inside the Linux kernel + (e.g. interrupt controller, timers) + + * Shadow page tables or EPT if present + + * Uses qemu or virtio for I/O virtualization + + + +Type 1 vs Type 2 Hypervisors +============================ + +.. slide:: Xen + :inline-contents: True + :level: 2 + + * Type 1 = Bare Metal Hypervisor + + * Type 2 = Hypervisor embedded in an exist kernel / OS + + +Xen +=== + +.. slide:: Xen + :inline-contents: True + :level: 2 + + .. image:: ../res/xen-overview.png diff --git a/Documentation/teaching/res/boot.cast b/Documentation/teaching/res/boot.cast new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/Documentation/teaching/res/context_switch.cast b/Documentation/teaching/res/context_switch.cast new file mode 100644 index 00000000000000..d59458dceff98f --- /dev/null +++ b/Documentation/teaching/res/context_switch.cast @@ -0,0 +1,1055 @@ +{"version": 2, "width": 80, "height": 24, "timestamp": 1615893527, "idle_time_limit": 1.0, "env": {"SHELL": null, "TERM": "xterm"}} +[0.002326, "o", "$ "] +[1.9175, "o", "m"] +[1.959726, "o", "a"] +[2.032681, "o", "k"] +[2.164502, "o", "e"] +[2.232149, "o", " "] +[2.488152, "o", "g"] +[2.596492, "o", "d"] +[2.677323, "o", "b"] +[5.648402, "o", "\r\n"] +[5.657328, "o", "gdb -ex \"target remote localhost:1234\" /linux/vmlinux\r\n"] +[5.69046, "o", "\u001b[35;1m\u001b[35;1mGNU gdb \u001b[m\u001b[35;1m(Ubuntu 9.2-0ubuntu1~20.04) \u001b[m\u001b[35;1m9.2\u001b[m\u001b[35;1m\r\n\u001b[m\u001b[mCopyright (C) 2020 Free Software Foundation, Inc.\r\nLicense GPLv3+: GNU GPL version 3 or later \r\nThis is free software: you are free to change and redistribute it.\r\nThere is NO WARRANTY, to the extent permitted by law.\r\nType \"show copying\" and \"show warranty\" for details.\r\nThis GDB was configured as \"x86_64-linux-gnu\".\r\nType \"show configuration\" for configuration details.\r\nFor bug reporting instructions, please see:\r\n.\r\nFind the GDB manual and other documentation resources online at:\r\n .\r\n\r\nFor help, type \"help\".\r\nType \"apropos word\" to search for commands related to \"word\"...\r\n"] +[5.690654, "o", "Reading symbols from \u001b[32m/linux/vmlinux\u001b[m...\r\n"] +[6.256082, "o", "Remote debugging using localhost:1234\r\n"] +[6.268525, "o", "\u001b[33mdefault_idle\u001b[m () at \u001b[32march/x86/kernel/process.c\u001b[m:689\r\n689\t}\r\n"] +[6.269022, "o", "(gdb) "] +[8.116975, "o", "b"] +[8.192267, "o", "r"] +[8.249065, "o", "e"] +[8.289452, "o", "a"] +[8.360076, "o", "k"] +[8.473813, "o", " "] +[8.832714, "o", "_"] +[9.629251, "o", "_"] +[9.941666, "o", "s"] +[10.104279, "o", "w"] +[10.221431, "o", "i"] +[10.309944, "o", "t"] +[10.509059, "o", "c"] +[10.557026, "o", "h"] +[10.849393, "o", "_"] +[11.068795, "o", "t"] +[11.110265, "o", "o"] +[11.352371, "o", "_"] +[14.021585, "o", "a"] +[14.117609, "o", "s"] +[14.213449, "o", "m"] +[14.888578, "o", "\r\n"] +[14.9167, "o", "Breakpoint 1 at \u001b[34m0xc10018e8\u001b[m: file \u001b[32march/x86/entry/entry_32.S\u001b[m, line 765.\r\n(gdb) "] +[16.968107, "o", "c"] +[18.104467, "o", "\r\nContinuing.\r\n"] +[18.156456, "o", "\r\n"] +[18.156634, "o", "Breakpoint 1, \u001b[33m__switch_to_asm\u001b[m () at \u001b[32march/x86/entry/entry_32.S\u001b[m:765\r\n765\t\tpushl\t%ebp\r\n(gdb) "] +[21.008556, "o", "#"] +[21.237238, "o", " "] +[21.396102, "o", "l"] +[21.51189, "o", "e"] +[21.76495, "o", "t"] +[22.05648, "o", "s"] +[22.357544, "o", " "] +[24.804429, "o", "i"] +[25.30049, "o", "n"] +[25.476535, "o", "s"] +[25.664918, "o", "p"] +[25.773304, "o", "e"] +[25.860811, "o", "c"] +[26.128876, "o", "t"] +[26.278527, "o", " "] +[26.396147, "o", "t"] +[26.648694, "o", "h"] +[27.311874, "o", "e"] +[27.496339, "o", " "] +[31.342091, "o", "p"] +[31.483434, "o", "r"] +[31.554903, "o", "e"] +[31.796618, "o", "v"] +[32.029108, "o", " "] +[33.628667, "o", "t"] +[34.296017, "o", "\b\u001b[K"] +[34.841223, "o", "("] +[36.838321, "o", "\b\u001b[K"] +[40.880455, "o", "t"] +[40.999641, "o", "a"] +[41.19229, "o", "s"] +[41.299116, "o", "k"] +[42.109647, "o", " "] +[42.583652, "o", "("] +[43.015787, "o", "\b\u001b[K"] +[43.181496, "o", "-"] +[43.312415, "o", " "] +[43.501299, "o", "t"] +[43.544044, "o", "h"] +[43.673582, "o", "e"] +[43.728303, "o", " "] +[43.912104, "o", "t"] +[44.079964, "o", "a"] +[44.136162, "o", "s"] +[44.225298, "o", "k"] +[44.297573, "o", " "] +[44.424163, "o", "w"] +[44.539926, "o", "e"] +[44.701627, "o", " "] +[44.845224, "o", "a"] +[45.013358, "o", "r"] +[45.058936, "o", "e"] +[45.143809, "o", " "] +[45.243002, "o", "s"] +[45.400578, "o", "w"] +[45.479309, "o", "i"] +[45.607647, "o", "t"] +[45.838044, "o", "c"] +[45.888549, "o", "h"] +[45.951578, "o", "i"] +[46.061361, "o", "n"] +[46.116627, "o", "g"] +[46.260458, "o", " "] +[46.312755, "o", "a"] +[46.62442, "o", "w"] +[46.717299, "o", "a"] +[46.870336, "o", "y"] +[46.975698, "o", " "] +[47.14446, "o", "f"] +[47.209771, "o", "r"] +[47.259233, "o", "o"] +[47.311775, "o", "m"] +[47.840572, "o", "\r\n(gdb) "] +[50.231971, "o", "l"] +[50.440366, "o", "i"] +[50.495315, "o", "s"] +[50.719925, "o", "t"] +[50.82848, "o", " "] +[51.933215, "o", "7"] +[54.782257, "o", "6"] +[56.895922, "o", "0"] +[57.559601, "o", "\r\n"] +[57.561485, "o", "755\t/*\r\n756\t * %eax: prev task\r\n757\t * %edx: next task\r\n758\t */\r\n759\t.pushsection .text, \"ax\"\r\n760\tSYM_CODE_START(__switch_to_asm)\r\n761\t\t/*\r\n762\t\t * Save callee-saved registers\r\n763\t\t * This must match the order in struct inactive_task_frame\r\n764\t\t */\r\n(gdb) "] +[60.462285, "o", "p"] +[60.804466, "o", "r"] +[60.880276, "o", "i"] +[60.952589, "o", "n"] +[61.048481, "o", "t"] +[61.144253, "o", " "] +[61.567815, "o", "("] +[62.084353, "o", "("] +[62.340434, "o", "s"] +[62.521336, "o", "t"] +[62.584033, "o", "r"] +[62.681088, "o", "u"] +[62.808595, "o", "c"] +[63.020327, "o", "t"] +[63.157429, "o", " "] +[63.280568, "o", "t"] +[63.377159, "o", "a"] +[63.533418, "o", "s"] +[63.622482, "o", "k"] +[63.875748, "o", "_"] +[64.146633, "o", "s"] +[64.482186, "o", "t"] +[64.511947, "o", "r"] +[64.637404, "o", "u"] +[64.777177, "o", "c"] +[64.97617, "o", "t"] +[65.695387, "o", "*"] +[66.288011, "o", ")"] +[67.279895, "o", "$"] +[71.141799, "o", "e"] +[71.240296, "o", "a"] +[71.636944, "o", "x"] +[72.43762, "o", ")"] +[72.839998, "o", "-"] +[73.182685, "o", ">"] +[73.437056, "o", "c"] +[73.58784, "o", "o"] +[73.7922, "o", "m"] +[73.916696, "o", "m"] +[74.544734, "o", "\r\n"] +[74.575314, "o", "$1 = \"swapper/0\\000\\000\\000\\000\\000\\000\"\r\n(gdb) "] +[78.960006, "o", "#"] +[79.380986, "o", " "] +[80.31777, "o", "a"] +[80.439599, "o", "n"] +[80.535856, "o", "d"] +[80.628399, "o", " "] +[80.759967, "o", "l"] +[80.807871, "o", "e"] +[80.984042, "o", "t"] +[81.197923, "o", "s"] +[81.316977, "o", " "] +[81.439785, "o", "s"] +[81.749919, "o", "e"] +[81.884422, "o", "e"] +[82.073889, "o", " "] +[83.576034, "o", "t"] +[83.693926, "o", "h"] +[83.808938, "o", "e"] +[83.973244, "o", " "] +[84.221634, "o", "s"] +[85.29237, "o", "\b\u001b[K"] +[85.445562, "o", "\b\u001b[K"] +[85.567927, "o", "\b\u001b[K"] +[85.701674, "o", "\b\u001b[K"] +[85.824798, "o", "\b\u001b[K"] +[86.004056, "o", "t"] +[86.079743, "o", "o"] +[86.237188, "o", " "] +[86.652315, "o", "w"] +[86.749408, "o", "h"] +[86.808631, "o", "i"] +[86.917022, "o", "c"] +[86.98438, "o", "h"] +[87.06647, "o", " "] +[87.260113, "o", "t"] +[87.388902, "o", "a"] +[87.551541, "o", "s"] +[87.712082, "o", "k"] +[88.144914, "o", " "] +[88.411207, "o", "w"] +[88.511967, "o", "e"] +[88.6247, "o", " "] +[88.808452, "o", "a"] +[88.973655, "o", "r"] +[89.044472, "o", "e"] +[89.128021, "o", " "] +[89.293244, "o", "s"] +[89.485572, "o", "w"] +[89.59152, "o", "i"] +[90.069057, "o", "t"] +[90.356352, "o", "c"] +[90.426188, "o", "h"] +[90.524199, "o", "i"] +[90.623739, "o", "n"] +[90.712003, "o", "g"] +[90.831687, "o", " "] +[90.996124, "o", "t"] +[91.064549, "o", "o"] +[91.608395, "o", "\r\n"] +[91.608597, "o", "(gdb) "] +[92.768673, "o", "# and lets see to which task we are switching to"] +[93.125736, "o", "\b"] +[93.506024, "o", "\r\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[9Pprint ((struct task_struct*)$eax)->comm"] +[93.752135, "o", "\b"] +[94.255802, "o", "\b"] +[94.2866, "o", "\b"] +[94.33478, "o", "\b"] +[94.36526, "o", "\b"] +[94.397811, "o", "\b"] +[94.428704, "o", "\b"] +[94.460327, "o", "\b"] +[94.495522, "o", "\b"] +[94.523042, "o", "\b"] +[94.554005, "o", "\b"] +[94.815994, "o", "\u001b[C"] +[94.981232, "o", "\u001b[C"] +[95.133933, "o", "\u001b[C"] +[97.174123, "o", "\b\u001b[1Px)->comm\b\b\b\b\b\b\b\b"] +[97.320654, "o", "dx)->comm\b\b\b\b\b\b\b\b"] +[98.368145, "o", "\r\n"] +[98.386055, "o", "$2 = \"kworker/0:1\\000\\000\\000\\000\"\r\n(gdb) "] +[109.290011, "o", "c"] +[110.600274, "o", "\r\nContinuing.\r\n"] +[110.611283, "o", "\r\n"] +[110.611546, "o", "Breakpoint 1, \u001b[33m__switch_to_asm\u001b[m () at \u001b[32march/x86/entry/entry_32.S\u001b[m:765\r\n765\t\tpushl\t%ebp\r\n(gdb) "] +[111.72507, "o", "c"] +[111.887716, "o", "\bprint ((struct task_struct*)$edx)->comm"] +[113.100353, "o", "\r\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C# and lets see to which task we are switching to"] +[114.228918, "o", "\r\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[9Pprint ((struct task_struct*)$edx)->comm"] +[115.280062, "o", "\r\n"] +[115.296922, "o", "$3 = \"swapper/0\\000\\000\\000\\000\\000\\000\"\r\n(gdb) "] +[116.415272, "o", "c"] +[116.912694, "o", "\r\n"] +[116.912781, "o", "Continuing.\r\n"] +[117.312405, "o", "\r\n"] +[117.313149, "o", "Breakpoint 1, \u001b[33m__switch_to_asm\u001b[m () at \u001b[32march/x86/entry/entry_32.S\u001b[m:765\r\n765\t\tpushl\t%ebp\r\n"] +[117.313648, "o", "(gdb) "] +[117.933355, "o", "c"] +[118.080051, "o", "\bprint ((struct task_struct*)$edx)->comm"] +[119.912687, "o", "\r\n"] +[119.92993, "o", "$4 = \"init\\000er/0\\000\\000\\000\\000\\000\\000\"\r\n(gdb) "] +[123.256997, "o", "print ((struct task_struct*)$edx)->comm"] +[123.876701, "o", "\r\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[K"] +[124.45439, "o", "print ((struct task_struct*)$edx)->comm"] +[124.647655, "o", "\b"] +[124.80468, "o", "\b"] +[124.952888, "o", "\b"] +[125.110239, "o", "\b"] +[125.248208, "o", "\b"] +[125.396351, "o", "\b"] +[125.549064, "o", "\b"] +[125.688114, "o", "\b"] +[126.095543, "o", "\b\u001b[1Px)->comm\b\b\b\b\b\b\b\b"] +[126.191882, "o", "ax)->comm\b\b\b\b\b\b\b\b"] +[126.719944, "o", "\r\n"] +[126.736207, "o", "$5 = \"swapper/0\\000\\000\\000\\000\\000\\000\"\r\n(gdb) "] +[133.316754, "o", "#"] +[133.484351, "o", " "] +[133.66117, "o", "l"] +[133.824069, "o", "o"] +[133.949648, "o", "o"] +[134.022861, "o", "k"] +[134.17215, "o", "s"] +[134.223574, "o", " "] +[134.331832, "o", "l"] +[134.49252, "o", "i"] +[134.639694, "o", "k"] +[134.688238, "o", "e"] +[134.775713, "o", " "] +[134.879648, "o", "w"] +[134.951696, "o", "e"] +[135.033237, "o", " "] +[135.163839, "o", "a"] +[135.264608, "o", "r"] +[135.320082, "o", "e"] +[135.40093, "o", " "] +[135.535122, "o", "s"] +[135.725486, "o", "w"] +[135.816051, "o", "i"] +[135.927456, "o", "t"] +[136.131156, "o", "c"] +[136.202658, "o", "h"] +[136.256422, "o", "i"] +[136.396776, "o", "n"] +[136.460338, "o", "g"] +[136.580257, "o", " "] +[136.687682, "o", "f"] +[136.77995, "o", "r"] +[136.832341, "o", "o"] +[136.887861, "o", "m"] +[137.036266, "o", " "] +[137.135292, "o", "t"] +[137.247438, "o", "h"] +[137.356536, "o", "e"] +[137.439224, "o", " "] +[137.678756, "o", "s"] +[137.871289, "o", "w"] +[137.999837, "o", "a"] +[138.119198, "o", "p"] +[138.257533, "o", "p"] +[138.350743, "o", "e"] +[138.433485, "o", "r"] +[138.749519, "o", " "] +[139.888082, "o", "t"] +[140.087705, "o", "a"] +[140.119863, "o", "s"] +[140.276783, "o", "k"] +[140.435722, "o", " "] +[140.592133, "o", "t"] +[140.691411, "o", "o"] +[140.78898, "o", " "] +[140.873948, "o", "t"] +[141.005069, "o", "h"] +[141.055666, "o", "e"] +[141.191609, "o", " "] +[141.493029, "o", "i"] +[141.5594, "o", "n"] +[141.639238, "o", "i"] +[141.744796, "o", "t"] +[141.82812, "o", " "] +[141.927392, "o", "t"] +[142.103712, "o", "a"] +[142.179748, "o", "s"] +[142.452147, "o", "k"] +[143.168201, "o", "\r\n(gdb) "] +[146.056263, "o", "b"] +[146.12877, "o", "t"] +[148.33612, "o", "\r\n"] +[148.35078, "o", "#0 \u001b[33m__switch_to_asm\u001b[m () at \u001b[32march/x86/entry/entry_32.S\u001b[m:765\r\n"] +[148.351576, "o", "#1 \u001b[34m0xc15d8277\u001b[m in \u001b[33mcontext_switch\u001b[m (\u001b[36mrf\u001b[m=0xc17c9f04, \u001b[36mnext\u001b[m=, \u001b[m\r\n"] +[148.351914, "o", " \u001b[m\u001b[36mprev\u001b[m=0xc17d02c0 , \u001b[36mrq\u001b[m=0xcfdcb700) at \u001b[32mkernel/sched/core.c\u001b[m:3779\r\n"] +[148.352077, "o", "#2 \u001b[33m__schedule\u001b[m (\u001b[36mpreempt\u001b[m=, \u001b[36mpreempt@entry\u001b[m=false)\u001b[m\r\n"] +[148.352169, "o", " \u001b[m at \u001b[32mkernel/sched/core.c\u001b[m:4528\r\n#3 \u001b[34m0xc15d8a37\u001b[m in \u001b[33mschedule_idle\u001b[m () at \u001b[32mkernel/sched/core.c\u001b[m:4634\r\n"] +[148.358451, "o", "#4 \u001b[34m0xc108d8a5\u001b[m in \u001b[33mdo_idle\u001b[m () at \u001b[32mkernel/sched/idle.c\u001b[m:327\r\n"] +[148.365129, "o", "#5 \u001b[34m0xc108dbd5\u001b[m in \u001b[33mcpu_startup_entry\u001b[m (\u001b[36mstate=state@entry\u001b[m=CPUHP_ONLINE)\u001b[m\r\n \u001b[m at \u001b[32mkernel/sched/idle.c\u001b[m:395\r\n#6 \u001b[34m0xc15d6100\u001b[m in \u001b[33mrest_init\u001b[m () at \u001b[32minit/main.c\u001b[m:721\r\n#7 \u001b[34m0xc18c77de\u001b[m in \u001b[33march_call_rest_init\u001b[m () at \u001b[32minit/main.c\u001b[m:845\r\n"] +[148.36552, "o", "#8 \u001b[34m0xc18c7c30\u001b[m in \u001b[33mstart_kernel\u001b[m () at \u001b[32minit/main.c\u001b[m:1061\r\n"] +[148.367951, "o", "#9 \u001b[34m0xc18c7218\u001b[m in \u001b[33mi386_start_kernel\u001b[m () at \u001b[32march/x86/kernel/head32.c\u001b[m:56\r\n"] +[148.368485, "o", "#10 \u001b[34m0xc10001db\u001b[m in \u001b[33mstartup_32_smp\u001b[m () at \u001b[32march/x86/kernel/head_32.S\u001b[m:327\r\n"] +[148.36957, "o", "#11 \u001b[34m0x00000000\u001b[m in \u001b[33m??\u001b[m ()\r\n"] +[148.369972, "o", "(gdb) "] +[151.00407, "o", "#"] +[151.19242, "o", " "] +[151.460669, "o", "y"] +[151.535715, "o", "e"] +[151.62446, "o", "s"] +[151.759619, "o", ","] +[151.847569, "o", " "] +[152.111536, "o", "t"] +[152.207294, "o", "h"] +[152.251822, "o", "i"] +[152.375566, "o", "s"] +[152.468326, "o", " "] +[152.651638, "o", "l"] +[152.815662, "o", "o"] +[152.959592, "o", "o"] +[153.076986, "o", "k"] +[153.231758, "o", "s"] +[153.335568, "o", " "] +[153.503403, "o", "l"] +[153.639799, "o", "i"] +[153.814824, "o", "k"] +[153.892142, "o", "e"] +[154.011029, "o", " "] +[154.136079, "o", "t"] +[154.191539, "o", "h"] +[154.287617, "o", "e"] +[154.367043, "o", " "] +[154.535718, "o", "s"] +[154.719646, "o", "w"] +[154.838308, "o", "a"] +[155.020956, "o", "p"] +[155.404904, "o", "p"] +[155.536173, "o", "e"] +[155.653023, "o", "r"] +[155.768249, "o", " "] +[156.056207, "o", "t"] +[156.143169, "o", "a"] +[156.291919, "o", "s"] +[156.440453, "o", "k"] +[158.224694, "o", "\r\n(gdb) "] +[162.551586, "o", "s"] +[163.114829, "o", "t"] +[163.205076, "o", "e"] +[163.340616, "o", "p"] +[163.643258, "o", "i"] +[167.327688, "o", "\b\u001b[K"] +[167.839044, "o", "\b\u001b[K"] +[167.965402, "o", "\b\u001b[K"] +[168.113034, "o", "\b\u001b[K"] +[168.239474, "o", "\b\u001b[K"] +[168.484145, "o", "#"] +[168.703431, "o", " "] +[169.027423, "o", "l"] +[169.112353, "o", "e"] +[169.28797, "o", "t"] +[169.524584, "o", "s"] +[169.735299, "o", " "] +[171.135828, "o", "s"] +[171.4075, "o", "t"] +[171.464843, "o", "e"] +[171.808964, "o", "p"] +[172.115755, "o", " "] +[172.317145, "o", "a"] +[172.47342, "o", "n"] +[172.596959, "o", "d"] +[172.735489, "o", " "] +[172.927669, "o", "o"] +[173.042781, "o", "v"] +[173.108033, "o", "e"] +[173.907894, "o", "\b\u001b[K"] +[174.127765, "o", "b"] +[174.815899, "o", "\b\u001b[K"] +[174.939745, "o", "\b\u001b[K"] +[175.15, "o", "b"] +[175.428235, "o", "s"] +[175.59581, "o", "e"] +[175.723777, "o", "r"] +[175.97285, "o", "v"] +[176.055296, "o", "e"] +[176.248513, "o", " "] +[176.51233, "o", "t"] +[177.968133, "o", "h"] +[179.192669, "o", "e"] +[182.952071, "o", "\b\u001b[K"] +[183.087972, "o", "\b\u001b[K"] +[183.214224, "o", "\b\u001b[K"] +[183.448903, "o", "h"] +[183.488214, "o", "o"] +[183.551638, "o", "w"] +[183.707961, "o", " "] +[183.951723, "o", "t"] +[184.027742, "o", "h"] +[184.119558, "o", "e"] +[184.227577, "o", " "] +[184.428877, "o", "c"] +[184.515292, "o", "o"] +[184.599136, "o", "n"] +[184.695565, "o", "t"] +[184.751404, "o", "e"] +[184.975304, "o", "x"] +[185.18823, "o", "t"] +[185.287641, "o", " "] +[185.383712, "o", "s"] +[185.581109, "o", "w"] +[185.704285, "o", "i"] +[185.783127, "o", "t"] +[186.016302, "o", "c"] +[186.102309, "o", "h"] +[187.085348, "o", " "] +[187.500213, "o", "u"] +[187.57576, "o", "n"] +[187.815524, "o", "f"] +[188.02442, "o", "o"] +[188.199542, "o", "l"] +[188.533385, "o", "d"] +[188.621774, "o", "s"] +[189.632444, "o", "\r\n(gdb) "] +[190.303457, "o", "s"] +[190.479487, "o", "t"] +[190.544727, "o", "e"] +[190.591511, "o", "p"] +[190.703644, "o", "i"] +[191.007796, "o", "\r\n"] +[191.010816, "o", "766\t\tpushl\t%ebx\r\n(gdb) "] +[191.912524, "o", "\r\n"] +[191.915059, "o", "767\t\tpushl\t%edi\r\n(gdb) "] +[192.743936, "o", "\r\n"] +[192.746308, "o", "768\t\tpushl\t%esi\r\n(gdb) "] +[193.511536, "o", "\r\n"] +[193.514218, "o", "774\t\tpushfl\r\n(gdb) "] +[194.415367, "o", "\r\n"] +[194.41946, "o", "\u001b[33m__switch_to_asm\u001b[m () at \u001b[32march/x86/entry/entry_32.S\u001b[m:777\r\n777\t\tmovl\t%esp, TASK_threadsp(%eax)\r\n"] +[194.419691, "o", "(gdb) "] +[195.220364, "o", "#"] +[195.520461, "o", " "] +[195.799342, "o", "w"] +[195.935095, "o", "e"] +[196.088054, "o", " "] +[196.252345, "o", "s"] +[196.373061, "o", "a"] +[196.59163, "o", "v"] +[196.623491, "o", "e"] +[196.823595, "o", "d"] +[197.018563, "o", " "] +[197.143737, "o", "t"] +[197.274616, "o", "h"] +[197.332048, "o", "e"] +[197.472049, "o", " "] +[197.783684, "o", "r"] +[197.831704, "o", "e"] +[198.02048, "o", "g"] +[198.141339, "o", "i"] +[198.192092, "o", "s"] +[198.339912, "o", "t"] +[198.402788, "o", "e"] +[198.520477, "o", "r"] +[198.688044, "o", "s"] +[198.84898, "o", " "] +[199.00533, "o", "s"] +[199.107804, "o", "o"] +[199.279942, "o", " "] +[199.508566, "o", "f"] +[199.671398, "o", "a"] +[199.887722, "o", "r"] +[200.351521, "o", ","] +[200.463301, "o", " "] +[200.655968, "o", "n"] +[200.687803, "o", "e"] +[200.772872, "o", "x"] +[201.05564, "o", "t"] +[201.240283, "o", " "] +[201.524903, "o", "w"] +[201.61563, "o", "e"] +[201.751344, "o", " "] +[202.125757, "o", "s"] +[202.523685, "o", "\b\u001b[K"] +[202.64147, "o", "w"] +[202.727705, "o", "i"] +[202.903547, "o", "l"] +[203.045364, "o", "l"] +[203.156751, "o", " "] +[203.413128, "o", "s"] +[203.596934, "o", "w"] +[203.719746, "o", "i"] +[203.832585, "o", "t"] +[204.068109, "o", "c"] +[204.14267, "o", "h"] +[204.271578, "o", " "] +[204.320065, "o", "t"] +[204.455512, "o", "h"] +[204.523486, "o", "e"] +[204.657363, "o", " "] +[204.759158, "o", "t"] +[205.586833, "o", "\b\u001b[K"] +[205.648223, "o", "s"] +[205.768517, "o", "t"] +[205.840198, "o", "a"] +[206.035781, "o", "c"] +[206.047227, "o", "k"] +[206.687714, "o", "\r\n"] +[206.687766, "o", "(gdb) "] +[207.179955, "o", "s"] +[207.355887, "o", "t"] +[207.428741, "o", "e"] +[207.457028, "o", "p"] +[207.612888, "o", "i"] +[208.064286, "o", "\r\n"] +[208.067075, "o", "778\t\tmovl\tTASK_threadsp(%edx), %esp\r\n(gdb) "] +[209.407591, "o", "\r\n"] +[209.411316, "o", "\u001b[33m__switch_to_asm\u001b[m () at \u001b[32march/x86/entry/entry_32.S\u001b[m:781\r\n781\t\tmovl\tTASK_stack_canary(%edx), %ebx\r\n"] +[209.411612, "o", "(gdb) "] +[212.902993, "o", "#"] +[213.186849, "o", " "] +[213.319001, "o", "w"] +[213.390944, "o", "e"] +[213.535507, "o", " "] +[213.663195, "o", "a"] +[213.799039, "o", "r"] +[213.879251, "o", "e"] +[213.967477, "o", " "] +[214.079533, "o", "d"] +[214.155535, "o", "o"] +[214.332766, "o", "n"] +[214.399683, "o", "e"] +[214.55702, "o", " "] +[214.7656, "o", "s"] +[215.047766, "o", "w"] +[215.228474, "o", "i"] +[215.304639, "o", "t"] +[215.519221, "o", "c"] +[215.604247, "o", "h"] +[215.733286, "o", "i"] +[215.813391, "o", "n"] +[215.8956, "o", "g"] +[216.004892, "o", " "] +[216.079528, "o", "t"] +[216.183413, "o", "h"] +[216.267953, "o", "e"] +[216.399454, "o", " "] +[216.623103, "o", "s"] +[216.821001, "o", "t"] +[216.863454, "o", "a"] +[217.111792, "o", "c"] +[217.195888, "o", "k"] +[217.707786, "o", ","] +[217.787686, "o", " "] +[217.949878, "o", "l"] +[218.023149, "o", "e"] +[218.167388, "o", "t"] +[218.375101, "o", "s"] +[218.503646, "o", " "] +[218.671911, "o", "t"] +[218.735588, "o", "a"] +[218.871225, "o", "k"] +[218.982322, "o", "e"] +[219.068728, "o", " "] +[219.175086, "o", "a"] +[219.296026, "o", " "] +[219.596276, "o", "l"] +[219.771622, "o", "o"] +[219.901863, "o", "o"] +[219.967878, "o", "k"] +[220.087157, "o", " "] +[220.17509, "o", "a"] +[220.347339, "o", "t"] +[220.403464, "o", " "] +[220.519087, "o", "t"] +[220.612148, "o", "h"] +[220.67667, "o", "e"] +[220.772254, "o", " "] +[220.943396, "o", "b"] +[221.031202, "o", "r"] +[221.038285, "o", "a"] +[221.298946, "o", "c"] +[221.420949, "o", "k"] +[221.741402, "o", "\b\u001b[K"] +[221.876722, "o", "\b\u001b[K"] +[221.999662, "o", "\b\u001b[K"] +[222.119543, "o", "\b\u001b[K"] +[222.151268, "o", "a"] +[222.263857, "o", "c"] +[222.343319, "o", "k"] +[222.566421, "o", "t"] +[222.731723, "o", "r"] +[222.806406, "o", "a"] +[222.983386, "o", "c"] +[223.076776, "o", "e"] +[223.447917, "o", "\r\n(gdb) "] +[224.002251, "o", "b"] +[224.055592, "o", "t"] +[224.271867, "o", "\r\n"] +[224.272024, "o", "#0 \u001b[33m__switch_to_asm\u001b[m () at \u001b[32march/x86/entry/entry_32.S\u001b[m:781\r\n"] +[224.27223, "o", "#1 \u001b[34m0xc253ba58\u001b[m in \u001b[33m??\u001b[m ()\r\nBacktrace stopped: previous frame inner to this frame (corrupt stack?)\r\n(gdb) "] +[226.94333, "o", "#"] +[227.288215, "o", " "] +[227.551148, "o", "o"] +[227.683628, "o", "o"] +[227.839199, "o", "p"] +[228.980471, "o", "s"] +[229.095151, "o", ","] +[229.197165, "o", " "] +[229.364899, "o", "t"] +[229.447006, "o", "h"] +[229.543446, "o", "e"] +[229.643847, "o", " "] +[229.764396, "o", "d"] +[229.823982, "o", "e"] +[229.922185, "o", "b"] +[229.959231, "o", "u"] +[230.103636, "o", "g"] +[230.50335, "o", "g"] +[230.628726, "o", "e"] +[230.732769, "o", "r"] +[230.877045, "o", " "] +[230.996275, "o", "i"] +[231.107576, "o", "s"] +[231.210359, "o", " "] +[231.399261, "o", "c"] +[231.487397, "o", "o"] +[231.599424, "o", "n"] +[232.09977, "o", "f"] +[232.287115, "o", "u"] +[232.485493, "o", "s"] +[232.676843, "o", "e"] +[232.771224, "o", "d"] +[233.251928, "o", ","] +[233.424736, "o", " "] +[240.103124, "o", "w"] +[240.182868, "o", "e"] +[240.327268, "o", " "] +[240.486874, "o", "a"] +[240.66716, "o", "r"] +[240.729259, "o", "e"] +[240.868472, "o", " "] +[241.096347, "o", "m"] +[241.18101, "o", "i"] +[241.327393, "o", "s"] +[241.485639, "o", "s"] +[241.574748, "o", "i"] +[241.661021, "o", "n"] +[241.743456, "o", "g"] +[241.906569, "o", " "] +[242.12363, "o", "t"] +[242.244647, "o", "h"] +[242.335037, "o", "e"] +[242.496621, "o", " "] +[245.351862, "o", "\b\u001b[K"] +[245.476547, "o", "\b\u001b[K"] +[245.600162, "o", "\b\u001b[K"] +[245.733041, "o", "\b\u001b[K"] +[245.927573, "o", "p"] +[246.013118, "o", "a"] +[246.119622, "o", "r"] +[246.279117, "o", "t"] +[246.484973, "o", "s"] +[246.583587, "o", " "] +[246.695127, "o", "o"] +[246.770543, "o", "f"] +[246.911599, "o", " "] +[246.959314, "o", "t"] +[247.101026, "o", "h"] +[247.164598, "o", "e"] +[247.299367, "o", " "] +[248.091601, "o", "s"] +[248.319596, "o", "t"] +[248.391035, "o", "a"] +[248.991394, "o", "c"] +[249.100166, "o", "k"] +[250.031636, "o", " "] +[250.096947, "o", "f"] +[250.283625, "o", "r"] +[250.351461, "o", "a"] +[250.432813, "o", "m"] +[250.535577, "o", "e"] +[251.560407, "o", "\r\n(gdb) "] +[252.27138, "o", "#"] +[252.743479, "o", " "] +[254.959991, "o", "l"] +[254.983878, "o", "e"] +[255.167102, "o", "t"] +[255.355618, "o", "s"] +[255.519673, "o", " "] +[255.657043, "o", "c"] +[255.783342, "o", "o"] +[255.911076, "o", "n"] +[256.091637, "o", "t"] +[256.167365, "o", "i"] +[256.267678, "o", "n"] +[256.373344, "o", "u"] +[256.645139, "o", "e"] +[256.802641, "o", " "] +[256.975427, "o", "u"] +[257.06117, "o", "n"] +[257.231687, "o", "t"] +[257.423098, "o", "i"] +[257.528178, "o", "l"] +[257.874938, "o", " "] +[257.998964, "o", "w"] +[258.090668, "o", "e"] +[258.204414, "o", " "] +[258.39651, "o", "r"] +[258.444584, "o", "e"] +[258.566703, "o", "a"] +[258.759543, "o", "c"] +[258.862693, "o", "h"] +[259.109149, "o", " "] +[269.467858, "o", "a"] +[269.639412, "o", " "] +[269.847362, "o", "p"] +[270.032414, "o", "r"] +[270.096595, "o", "o"] +[270.245178, "o", "p"] +[270.383526, "o", "e"] +[270.448348, "o", "r"] +[270.579818, "o", " "] +[270.691356, "o", "f"] +[270.863693, "o", "u"] +[270.894962, "o", "n"] +[271.019515, "o", "c"] +[271.23123, "o", "t"] +[271.287233, "o", "i"] +[271.343717, "o", "o"] +[271.739795, "o", "n"] +[272.06029, "o", " "] +[272.580422, "o", "\b\u001b[K"] +[272.831445, "o", ","] +[272.951811, "o", " "] +[273.831788, "o", "e"] +[274.015133, "o", "."] +[274.207026, "o", "g"] +[274.284353, "o", "."] +[274.39683, "o", " "] +[275.196925, "o", "_"] +[275.339729, "o", "_"] +[275.831025, "o", "s"] +[276.015813, "o", "w"] +[276.180803, "o", "i"] +[276.331724, "o", "t"] +[276.534941, "o", "c"] +[276.566947, "o", "h"] +[276.864471, "o", "_"] +[277.055747, "o", "t"] +[277.103744, "o", "o"] +[277.879557, "o", "\r\n(gdb) "] +[278.37271, "o", "b"] +[278.477418, "o", "r"] +[278.524454, "o", "e"] +[278.590659, "o", "a"] +[278.684679, "o", "k"] +[278.775286, "o", " "] +[279.271091, "o", "_"] +[279.387961, "o", "_"] +[279.591305, "o", "s"] +[279.782144, "o", "w"] +[279.863553, "o", "i"] +[279.975196, "o", "t"] +[280.157072, "o", "c"] +[280.207462, "o", "h"] +[280.498546, "o", "_"] +[280.716367, "o", "t"] +[280.780306, "o", "o"] +[281.903272, "o", "\r\n"] +[281.920624, "o", "Breakpoint 2 at \u001b[34m0xc1020050\u001b[m: file \u001b[32march/x86/kernel/process_32.c\u001b[m, line 159.\r\n(gdb) "] +[285.580302, "o", "c"] +[287.023431, "o", "\r\nContinuing.\r\n"] +[287.025661, "o", "\r\n"] +[287.026259, "o", "Breakpoint 2, \u001b[33m__switch_to\u001b[m (\u001b[36mprev_p\u001b[m=0xc17d02c0 , \u001b[36mnext_p\u001b[m=0xc2530040)\u001b[m\r\n \u001b[m at \u001b[32march/x86/kernel/process_32.c\u001b[m:159\r\n159\t{\r\n"] +[287.026396, "o", "(gdb) "] +[289.067669, "o", "#"] +[289.21566, "o", " "] +[289.407583, "o", "n"] +[289.463508, "o", "o"] +[289.543951, "o", "w"] +[289.72241, "o", " "] +[289.894812, "o", "l"] +[290.019586, "o", "e"] +[290.175723, "o", "t"] +[290.42348, "o", "s"] +[290.636596, "o", " "] +[291.733293, "o", "t"] +[291.80722, "o", "a"] +[291.931757, "o", "k"] +[292.036902, "o", "e"] +[292.123167, "o", " "] +[292.211687, "o", "a"] +[292.316232, "o", " "] +[292.480027, "o", "l"] +[292.660474, "o", "o"] +[292.811518, "o", "o"] +[292.919126, "o", "k"] +[293.143171, "o", " "] +[293.27092, "o", "a"] +[293.503276, "o", "t"] +[293.610102, "o", " "] +[293.723124, "o", "t"] +[293.867511, "o", "h"] +[293.950794, "o", "e"] +[294.092621, "o", " "] +[294.354438, "o", "b"] +[294.80766, "o", "a"] +[294.983003, "o", "c"] +[295.43947, "o", "k"] +[295.612526, "o", "t"] +[295.771893, "o", "r"] +[295.860273, "o", "a"] +[296.024455, "o", "c"] +[296.080174, "o", "e"] +[296.207233, "o", " "] +[296.260988, "o", "a"] +[296.491858, "o", "g"] +[296.557449, "o", "a"] +[296.659441, "o", "i"] +[296.727329, "o", "n"] +[297.063689, "o", "\r\n(gdb) "] +[297.45931, "o", "b"] +[297.535189, "o", "t"] +[297.919758, "o", "\r\n"] +[297.920155, "o", "#0 \u001b[33m__switch_to\u001b[m (\u001b[36mprev_p\u001b[m=0xc17d02c0 , \u001b[36mnext_p\u001b[m=0xc2530040)\u001b[m\r\n \u001b[m at \u001b[32march/x86/kernel/process_32.c\u001b[m:159\r\n"] +[297.920545, "o", "#1 \u001b[34m0xc15d8277\u001b[m in \u001b[33mcontext_switch\u001b[m (\u001b[36mrf\u001b[m=0xc253ba3c, \u001b[36mnext\u001b[m=, \u001b[m\r\n"] +[297.920652, "o", " \u001b[m\u001b[36mprev\u001b[m=0xc2530040, \u001b[36mrq\u001b[m=0xcfdcb700) at \u001b[32mkernel/sched/core.c\u001b[m:3779\r\n"] +[297.920965, "o", "#2 \u001b[33m__schedule\u001b[m (\u001b[36mpreempt\u001b[m=, \u001b[36mpreempt@entry\u001b[m=false)\u001b[m\r\n \u001b[m at \u001b[32mkernel/sched/core.c\u001b[m:4528\r\n#3 \u001b[34m0xc15d86ce\u001b[m in \u001b[33mschedule\u001b[m () at \u001b[32mkernel/sched/core.c\u001b[m:4606\r\n"] +[297.927061, "o", "#4 \u001b[34m0xc15ddb66\u001b[m in \u001b[33mschedule_hrtimeout_range_clock\u001b[m (\u001b[36mexpires\u001b[m=, \u001b[m\r\n"] +[297.927149, "o", " \u001b[m\u001b[36mdelta\u001b[m=, \u001b[36mmode\u001b[m=HRTIMER_MODE_ABS, \u001b[36mclock_id\u001b[m=1)\u001b[m\r\n \u001b[m at \u001b[32mkernel/time/hrtimer.c\u001b[m:2139\r\n"] +[297.935662, "o", "#5 \u001b[34m0xc15ddc3b\u001b[m in \u001b[33mschedule_hrtimeout_range\u001b[m (\u001b[36mexpires=expires@entry\u001b[m=0xc253bb7c, \u001b[m\r\n \u001b[m\u001b[36mdelta\u001b[m=, \u001b[36mmode=mode@entry\u001b[m=HRTIMER_MODE_ABS)\u001b[m\r\n \u001b[m at \u001b[32mkernel/time/hrtimer.c\u001b[m:2184\r\n"] +[297.936228, "o", "#6 \u001b[34m0xc11c037f\u001b[m in \u001b[33mpoll_schedule_timeout\u001b[m (\u001b[36mpwq=pwq@entry\u001b[m=0xc253bb84, \u001b[m\r\n"] +[297.936671, "o", " \u001b[m\u001b[36mexpires=expires@entry\u001b[m=0xc253bb7c, \u001b[36mslack=slack@entry\u001b[m=4999986, \u001b[36mstate\u001b[m=1)\u001b[m\r\n"] +[297.936825, "o", " \u001b[m at \u001b[32mfs/select.c\u001b[m:243\r\n"] +[297.937767, "o", "#7 \u001b[34m0xc11c0baf\u001b[m in \u001b[33mdo_select\u001b[m (\u001b[36mn\u001b[m=, \u001b[36mn@entry\u001b[m=11, \u001b[m\r\n"] +[297.938173, "o", " \u001b[m\u001b[36mfds=fds@entry\u001b[m=0xc253be20, \u001b[36mend_time=end_time@entry\u001b[m=0xc253bf70)\u001b[m\r\n \u001b[m at \u001b[32mfs/select.c\u001b[m:603\r\n"] +[297.938674, "o", "#8 \u001b[34m0xc11c1985\u001b[m in \u001b[33mcore_sys_select\u001b[m (\u001b[36mn\u001b[m=, \u001b[36mn@entry\u001b[m=11, \u001b[m\r\n"] +[297.939277, "o", " \u001b[m\u001b[36minp=inp@entry\u001b[m=0xbf984f00, \u001b[36moutp=outp@entry\u001b[m=0x0, \u001b[36mexp\u001b[m=, \u001b[m\r\n"] +[297.939469, "o", " \u001b[m\u001b[36mexp@entry\u001b[m=0x0, \u001b[36mend_time\u001b[m=) at \u001b[32mfs/select.c\u001b[m:677\r\n"] +[297.940066, "o", "#9 \u001b[34m0xc11c1f64\u001b[m in \u001b[33mkern_select\u001b[m (\u001b[36mn\u001b[m=11, \u001b[36minp\u001b[m=0xbf984f00, \u001b[36moutp\u001b[m=0x0, \u001b[m\r\n"] +[297.94015, "o", "\u001b[m--Type for more, q to quit, c to continue without paging--"] +[299.775688, "o", "\r\n"] +[299.776122, "o", " \u001b[m\u001b[36mexp=exp@entry\u001b[m=0x0, \u001b[36mtvp\u001b[m=0xbf984df0) at \u001b[32mfs/select.c\u001b[m:718\r\n#10 \u001b[34m0xc11c1fe1\u001b[m in \u001b[33m__do_sys_select\u001b[m (\u001b[36mtvp\u001b[m=, \u001b[36mexp\u001b[m=, \u001b[m\r\n \u001b[m\u001b[36moutp\u001b[m=, \u001b[36minp\u001b[m=, \u001b[36mn\u001b[m=)\u001b[m\r\n \u001b[m at \u001b[32mfs/select.c\u001b[m:725\r\n"] +[299.77624, "o", "#11 \u001b[33m__se_sys_select\u001b[m (\u001b[36mtvp\u001b[m=, \u001b[36mexp\u001b[m=, \u001b[m\r\n \u001b[m\u001b[36moutp\u001b[m=, \u001b[36minp\u001b[m=, \u001b[36mn\u001b[m=)\u001b[m\r\n \u001b[m at \u001b[32mfs/select.c\u001b[m:722\r\n"] +[299.780969, "o", "#12 \u001b[33m__ia32_sys_select\u001b[m (\u001b[36mregs\u001b[m=) at \u001b[32mfs/select.c\u001b[m:722\r\n"] +[299.781337, "o", "#13 \u001b[34m0xc15d29cc\u001b[m in \u001b[33mdo_syscall_32_irqs_on\u001b[m (\u001b[36mnr\u001b[m=, \u001b[36mregs\u001b[m=0xc253bfb4)\u001b[m\r\n \u001b[m at \u001b[32march/x86/entry/common.c\u001b[m:77\r\n"] +[299.782024, "o", "#14 \u001b[33mdo_int80_syscall_32\u001b[m (\u001b[36mregs\u001b[m=0xc253bfb4) at \u001b[32march/x86/entry/common.c\u001b[m:94\r\n#15 \u001b[34m0xc15dfaeb\u001b[m in \u001b[33mentry_INT80_32\u001b[m () at \u001b[32march/x86/entry/entry_32.S\u001b[m:1059\r\n"] +[299.782699, "o", "#16 \u001b[34m0x0000000b\u001b[m in \u001b[33m??\u001b[m ()\r\n"] +[299.783203, "o", "#17 \u001b[34m0xbf984f00\u001b[m in \u001b[33m??\u001b[m ()\r\nBacktrace stopped: previous frame inner to this frame (corrupt stack?)\r\n(gdb) "] +[303.942905, "o", "#"] +[304.103437, "o", " "] +[304.274144, "o", "o"] +[304.358619, "o", "k"] +[304.659941, "o", ","] +[304.766807, "o", " "] +[305.28809, "o", "t"] +[305.375297, "o", "h"] +[305.431933, "o", "i"] +[305.540524, "o", "s"] +[305.655296, "o", " "] +[305.901221, "o", "l"] +[306.115461, "o", "o"] +[306.246553, "o", "o"] +[306.32009, "o", "k"] +[306.460058, "o", "s"] +[306.542589, "o", " "] +[306.694869, "o", "l"] +[306.907465, "o", "i"] +[307.075817, "o", "k"] +[307.179414, "o", "e"] +[307.295136, "o", " "] +[307.54844, "o", "t"] +[307.663017, "o", "h"] +[307.815168, "o", "e"] +[308.156909, "o", " "] +[308.39174, "o", "i"] +[308.532433, "o", "n"] +[308.595757, "o", "i"] +[310.686866, "o", "t"] +[310.8524, "o", " "] +[310.953083, "o", "t"] +[311.220312, "o", "h"] +[311.340814, "o", "r"] +[311.470856, "o", "e"] +[311.548828, "o", "a"] +[311.734896, "o", "d"] +[313.767614, "o", "\r\n"] +[313.767976, "o", "(gdb) "] +[316.999241, "o", "#"] +[317.296435, "o", " "] +[317.548667, "o", "s"] +[317.607326, "o", "o"] +[317.763134, "o", " "] +[317.887136, "o", "t"] +[317.999336, "o", "h"] +[318.095442, "o", "e"] +[318.188349, "o", " "] +[318.387557, "o", "c"] +[318.475079, "o", "o"] +[318.700291, "o", "n"] +[318.776111, "o", "t"] +[318.855308, "o", "e"] +[319.046835, "o", "x"] +[319.271115, "o", "t"] +[319.381068, "o", " "] +[319.461433, "o", "s"] +[319.631028, "o", "w"] +[319.748625, "o", "i"] +[319.846916, "o", "t"] +[320.079731, "o", "c"] +[320.134923, "o", "h"] +[320.268, "o", " "] +[320.830957, "o", "h"] +[320.99934, "o", "a"] +[321.10033, "o", "s"] +[322.903464, "o", "\b\u001b[K"] +[323.407499, "o", "\b\u001b[K"] +[323.451112, "o", "\b\u001b[K"] +[323.482209, "o", "\b\u001b[K"] +[323.513329, "o", "\b\u001b[K"] +[326.052876, "o", "h"] +[326.718835, "o", " "] +[326.942968, "o", "0"] +[327.283417, "o", " "] +[327.524209, "o", "\b\u001b[K"] +[327.654149, "o", "\b\u001b[K"] +[328.142643, "o", "-"] +[328.332367, "o", " "] +[330.483798, "o", "o"] +[330.650327, "o", "r"] +[330.732245, "o", " "] +[330.873854, "o", "p"] +[330.918697, "o", "a"] +[331.030843, "o", "r"] +[331.223103, "o", "t"] +[331.335218, "o", " "] +[331.471149, "o", "o"] +[331.56318, "o", "f"] +[331.699593, "o", " "] +[332.264753, "o", "i"] +[332.407088, "o", "t"] +[332.812649, "o", ","] +[332.97567, "o", " "] +[333.070572, "o", "t"] +[333.252959, "o", "h"] +[333.375206, "o", "a"] +[333.596096, "o", "t"] +[333.942887, "o", " "] +[334.676468, "o", "k"] +[334.807218, "o", "e"] +[334.839089, "o", "r"] +[335.599008, "o", "\b\u001b[K"] +[336.056624, "o", "\b\u001b[K"] +[336.200204, "o", "\b\u001b[K"] +[337.431057, "o", "k"] +[337.554329, "o", "e"] +[337.579172, "o", "r"] +[337.725617, "o", "n"] +[338.559416, "o", "e"] +[338.622963, "o", "l"] +[338.726658, "o", " "] +[338.868311, "o", "s"] +[339.014698, "o", "t"] +[339.079695, "o", "a"] +[339.31094, "o", "c"] +[339.413228, "o", "k"] +[339.526949, "o", " "] +[340.127059, "o", "s"] +[340.291598, "o", "w"] +[340.402142, "o", "i"] +[340.49965, "o", "t"] +[340.729408, "o", "c"] +[340.783126, "o", "h"] +[341.207083, "o", ","] +[341.469127, "o", " "] +[342.923481, "o", "\b\u001b[K"] +[343.071348, "o", "\b\u001b[K"] +[343.39639, "o", " "] +[344.339672, "o", "i"] +[344.620158, "o", "s"] +[344.74322, "o", " "] +[344.874119, "o", "d"] +[344.950506, "o", "o"] +[345.047877, "o", "n"] +[345.113522, "o", "e"] +[345.694122, "o", "\r\n"] +[345.694186, "o", "(gdb) "] +[349.573599, "o", "quit\r\n"] +[349.574221, "o", "A debugging session is active.\r\n\r\n\tInferior 1 [process 1] will be detached.\r\n\r\nQuit anyway? (y or n) "] +[350.532606, "o", "y"] +[350.638505, "o", "\r\nDetaching from program: /linux/vmlinux, process 1\r\n"] +[350.639382, "o", "Ending remote debugging.\r\n[Inferior 1 (process 1) detached]\r\n"] +[350.646756, "o", "$ "] +[352.121258, "o", "\r\n"] diff --git a/Documentation/teaching/res/dma-remapping.png b/Documentation/teaching/res/dma-remapping.png new file mode 100755 index 00000000000000..a481116171e8a0 Binary files /dev/null and b/Documentation/teaching/res/dma-remapping.png differ diff --git a/Documentation/teaching/res/dts_node.png b/Documentation/teaching/res/dts_node.png new file mode 100644 index 00000000000000..2404462c518bc2 Binary files /dev/null and b/Documentation/teaching/res/dts_node.png differ diff --git a/Documentation/teaching/res/fib-trie-compressed.png b/Documentation/teaching/res/fib-trie-compressed.png new file mode 100755 index 00000000000000..44235ff58696f0 Binary files /dev/null and b/Documentation/teaching/res/fib-trie-compressed.png differ diff --git a/Documentation/teaching/res/fib-trie.png b/Documentation/teaching/res/fib-trie.png new file mode 100755 index 00000000000000..f0da22f1244da7 Binary files /dev/null and b/Documentation/teaching/res/fib-trie.png differ diff --git a/Documentation/teaching/res/fidb-details.png b/Documentation/teaching/res/fidb-details.png new file mode 100755 index 00000000000000..c146bc3c398bd7 Binary files /dev/null and b/Documentation/teaching/res/fidb-details.png differ diff --git a/Documentation/teaching/res/fidb-overview.png b/Documentation/teaching/res/fidb-overview.png new file mode 100755 index 00000000000000..a9f86970b2ce8f Binary files /dev/null and b/Documentation/teaching/res/fidb-overview.png differ diff --git a/Documentation/teaching/res/inspect_task_struct.cast b/Documentation/teaching/res/inspect_task_struct.cast new file mode 100644 index 00000000000000..52cb2df64dee93 --- /dev/null +++ b/Documentation/teaching/res/inspect_task_struct.cast @@ -0,0 +1,849 @@ +{"version": 2, "width": 80, "height": 24, "timestamp": 1615763059, "idle_time_limit": 1.0, "env": {"SHELL": null, "TERM": "xterm"}} +[0.002258, "o", "$ "] +[0.627367, "o", "m"] +[0.68335, "o", "a"] +[0.786756, "o", "k"] +[0.835143, "o", "e"] +[0.987307, "o", " "] +[1.211073, "o", "g"] +[1.44347, "o", "d"] +[1.931409, "o", "b"] +[2.419393, "o", "\r\n"] +[2.425118, "o", "gdb -ex \"target remote localhost:1234\" /linux/vmlinux\r\n"] +[2.46016, "o", "\u001b[35;1m\u001b[35;1mGNU gdb \u001b[m\u001b[35;1m(Ubuntu 9.2-0ubuntu1~20.04) \u001b[m\u001b[35;1m9.2\u001b[m\u001b[35;1m\r\n\u001b[m\u001b[mCopyright (C) 2020 Free Software Foundation, Inc.\r\nLicense GPLv3+: GNU GPL version 3 or later \r\nThis is free software: you are free to change and redistribute it.\r\nThere is NO WARRANTY, to the extent permitted by law.\r\nType \"show copying\" and \"show warranty\" for details.\r\nThis GDB was configured as \"x86_64-linux-gnu\".\r\nType \"show configuration\" for configuration details.\r\nFor bug reporting instructions, please see:\r\n.\r\nFind the GDB manual and other documentation resources online at:\r\n .\r\n\r\nFor help, type \"help\".\r\nType \"apropos word\" to search for commands related to \"word\"...\r\n"] +[2.460424, "o", "Reading symbols from \u001b[32m/linux/vmlinux\u001b[m...\r\n"] +[3.047524, "o", "Remote debugging using localhost:1234\r\n"] +[3.067155, "o", "\u001b[33m__lock_acquire\u001b[m (\u001b[36mlock=lock@entry\u001b[m=0xc2416250, \u001b[36msubclass=subclass@entry\u001b[m=0, \u001b[m\r\n"] +[3.067502, "o", " \u001b[m\u001b[36mtrylock=trylock@entry\u001b[m=0, \u001b[36mread=read@entry\u001b[m=0, \u001b[36mcheck=check@entry\u001b[m=1, \u001b[m\r\n"] +[3.06766, "o", "\u001b[m--Type for more, q to quit, c to continue without paging--"] +[4.708007, "o", "\r\n"] +[4.708796, "o", " \u001b[m\u001b[36mhardirqs_off=hardirqs_off@entry\u001b[m=1, \u001b[36mnest_lock=nest_lock@entry\u001b[m=0x0, \u001b[m\r\n \u001b[m\u001b[36mip=ip@entry\u001b[m=3241676808, \u001b[36mreferences=references@entry\u001b[m=0, \u001b[m\r\n"] +[4.708863, "o", " \u001b[m\u001b[36mpin_count=pin_count@entry\u001b[m=0) at \u001b[32mkernel/locking/lockdep.c\u001b[m:4738\r\n"] +[4.709098, "o", "4738\t\tclass_idx = class - lock_classes;\r\n"] +[4.70932, "o", "(gdb) "] +[6.809114, "o", "l"] +[6.995472, "o", "x"] +[7.138947, "o", "-"] +[7.331348, "o", "p"] +[7.894323, "o", "s"] +[11.123106, "o", "\r\n"] +[11.123178, "o", " TASK PID COMM\r\n"] +[11.12485, "o", "0xc17d02c0 0 swapper/0\r\n"] +[11.126522, "o", "0xc2530040 1 swapper/0\r\n"] +[11.127872, "o", "0xc2534080 2 kthreadd\r\n"] +[11.129364, "o", "0xc25360c0 3 rcu_gp\r\n"] +[11.13074, "o", "0xc2537100 4 rcu_par_gp\r\n"] +[11.132056, "o", "0xc2545140 5 kworker/0:0\r\n"] +[11.133156, "o", "0xc2546180 6 kworker/0:0H\r\n"] +[11.134051, "o", "0xc25481c0 7 kworker/u2:0\r\n"] +[11.135046, "o", "0xc2549000 8 mm_percpu_wq\r\n"] +[11.135873, "o", "0xc254b040 9 ksoftirqd/0\r\n"] +[11.136804, "o", "0xc254c080 10 rcu_sched\r\n"] +[11.137649, "o", "0xc254e0c0 11 migration/0\r\n"] +[11.138414, "o", "0xc2572100 12 cpuhp/0\r\n"] +[11.139254, "o", "0xc2576140 13 kdevtmpfs\r\n"] +[11.140061, "o", "0xc2594180 14 netns\r\n"] +[11.140849, "o", "0xc26211c0 15 oom_reaper\r\n"] +[11.141609, "o", "0xc2623000 16 writeback\r\n"] +[11.142354, "o", "0xc26300c0 32 kblockd\r\n"] +[11.143193, "o", "\u001b[m--Type for more, q to quit, c to continue without paging--"] +[13.172107, "o", "q"] +[13.386945, "o", "\r\n"] +[13.387104, "o", "Quit\r\n(gdb) "] +[13.924017, "o", "#"] +[14.128394, "o", " "] +[14.691413, "o", "n"] +[14.777001, "o", "o"] +[15.024956, "o", "t"] +[15.200252, "o", "i"] +[15.658237, "o", "\b\u001b[K"] +[15.800549, "o", "\b\u001b[K"] +[18.149577, "o", "\b\u001b[K"] +[18.707978, "o", "\b\u001b[K"] +[18.992922, "o", "l"] +[19.107602, "o", "e"] +[19.28165, "o", "t"] +[19.505188, "o", "s"] +[19.600963, "o", " "] +[19.804348, "o", "l"] +[20.000551, "o", "o"] +[20.142601, "o", "o"] +[20.171715, "o", "k"] +[20.511567, "o", " "] +[20.691626, "o", "a"] +[20.998173, "o", "t"] +[21.083584, "o", " "] +[21.211514, "o", "t"] +[21.323576, "o", "h"] +[21.707293, "o", "e"] +[21.835442, "o", " "] +[23.451569, "o", "f"] +[23.621036, "o", "i"] +[23.690171, "o", "r"] +[23.8906, "o", "s"] +[24.099481, "o", "t"] +[24.459424, "o", " "] +[24.640336, "o", "t"] +[25.216437, "o", "a"] +[25.444316, "o", "s"] +[25.547244, "o", "k"] +[27.204116, "o", "\r\n(gdb) "] +[30.617187, "o", "p"] +[30.764631, "o", "r"] +[30.859261, "o", "i"] +[30.923074, "o", "n"] +[30.971535, "o", "t"] +[31.101722, "o", " "] +[31.377345, "o", "("] +[31.49992, "o", "s"] +[31.665037, "o", "t"] +[31.7159, "o", "r"] +[31.797039, "o", "u"] +[31.891209, "o", "c"] +[32.134894, "o", "t"] +[32.275529, "o", " "] +[32.38782, "o", "t"] +[32.593072, "o", "a"] +[32.670072, "o", "s"] +[32.723656, "o", "k"] +[32.97944, "o", "_"] +[33.411209, "o", "s"] +[33.571094, "o", "t"] +[33.633386, "o", "r"] +[33.723362, "o", "u"] +[33.819607, "o", "c"] +[34.03609, "o", "t"] +[34.243481, "o", " "] +[34.8279, "o", "*"] +[34.939317, "o", ")"] +[41.353684, "o", "0xc17d02"] +[41.353931, "o", "c0"] +[42.556284, "o", "\r\n"] +[42.572593, "o", "$1 = (struct task_struct *) \u001b[34m0xc17d02c0\u001b[m <\u001b[33minit_task\u001b[m>\r\n(gdb) "] +[43.76609, "o", " "] +[44.144961, "o", "\b\u001b[K"] +[44.404337, "o", "#"] +[44.491671, "o", " "] +[44.729417, "o", "n"] +[44.776973, "o", "o"] +[44.964074, "o", "t"] +[45.020048, "o", "i"] +[45.220228, "o", "c"] +[45.307108, "o", "e"] +[45.403404, "o", " "] +[45.579136, "o", "t"] +[45.651669, "o", "h"] +[45.752831, "o", "a"] +[45.851754, "o", "t"] +[45.932833, "o", " "] +[46.080052, "o", "t"] +[46.189538, "o", "h"] +[46.331405, "o", "e"] +[46.464752, "o", " "] +[46.737976, "o", "t"] +[47.031178, "o", "s"] +[47.034726, "o", "a"] +[47.488137, "o", "\b\u001b[K"] +[47.656792, "o", "\b\u001b[K"] +[47.731352, "o", "a"] +[47.842274, "o", "s"] +[47.879582, "o", "k"] +[48.063247, "o", " "] +[48.304771, "o", "i"] +[48.451198, "o", "s"] +[48.568817, "o", " "] +[48.675509, "o", "a"] +[48.836947, "o", "l"] +[48.985894, "o", "l"] +[49.180376, "o", "o"] +[49.335982, "o", "c"] +[49.427444, "o", "a"] +[49.635823, "o", "t"] +[49.691801, "o", "e"] +[49.881213, "o", "d"] +[49.964905, "o", " "] +[50.099764, "o", "d"] +[50.226974, "o", "i"] +[50.321136, "o", "r"] +[50.388547, "o", "e"] +[50.569383, "o", "c"] +[50.777146, "o", "t"] +[50.811594, "o", "l"] +[51.049026, "o", "y"] +[51.154598, "o", " "] +[51.316279, "o", "i"] +[51.381979, "o", "n"] +[51.432314, "o", " "] +[51.556065, "o", "t"] +[51.661287, "o", "h"] +[51.783469, "o", "e"] +[51.867781, "o", " "] +[52.096542, "o", "i"] +[52.241698, "o", "m"] +[52.379699, "o", "a"] +[52.616666, "o", "g"] +[52.684999, "o", "e"] +[54.076647, "o", "\r\n"] +[54.076706, "o", "(gdb) "] +[54.548115, "o", "#"] +[55.481235, "o", " "] +[55.929872, "o", "i"] +[56.729007, "o", "\b\u001b[K"] +[56.836286, "o", "t"] +[56.965048, "o", "h"] +[57.020301, "o", "i"] +[57.152388, "o", " "] +[57.361979, "o", "i"] +[57.484185, "o", "s"] +[58.104718, "o", "\b\u001b[K"] +[58.211525, "o", "\b\u001b[K"] +[58.345772, "o", "\b\u001b[K"] +[58.599277, "o", "s"] +[58.699061, "o", " "] +[58.872073, "o", "i"] +[59.024165, "o", "s"] +[59.121158, "o", " "] +[59.261122, "o", "t"] +[59.342248, "o", "h"] +[59.959054, "o", "e"] +[60.096489, "o", " "] +[60.195746, "o", "f"] +[60.316017, "o", "i"] +[60.40269, "o", "r"] +[60.595528, "o", "s"] +[60.795261, "o", "t"] +[60.875222, "o", " "] +[61.023882, "o", "t"] +[61.219665, "o", "a"] +[61.294759, "o", "s"] +[61.699194, "o", "k"] +[61.947333, "o", " "] +[62.186605, "o", "t"] +[62.292669, "o", "h"] +[62.383993, "o", "a"] +[62.462394, "o", "t"] +[62.611368, "o", " "] +[63.073466, "o", "r"] +[63.232473, "o", "u"] +[63.350378, "o", "n"] +[64.143825, "o", " "] +[64.346129, "o", "d"] +[64.457352, "o", "u"] +[64.594958, "o", "r"] +[64.667077, "o", "i"] +[64.739904, "o", "n"] +[64.819518, "o", "g"] +[64.932011, "o", " "] +[65.416451, "o", "b"] +[65.50703, "o", "o"] +[65.61947, "o", "o"] +[65.723128, "o", "t"] +[66.372533, "o", "\r\n"] +[66.37262, "o", "(gdb) "] +[67.489239, "o", "#"] +[67.69126, "o", " "] +[67.812288, "o", "a"] +[67.939709, "o", "l"] +[68.155397, "o", "s"] +[68.245108, "o", "o"] +[68.428805, "o", " "] +[68.619862, "o", "n"] +[68.642887, "o", "o"] +[68.779469, "o", "t"] +[68.88901, "o", "i"] +[69.043262, "o", "c"] +[69.136781, "o", "e"] +[69.281437, "o", " "] +[69.443731, "o", "t"] +[69.531328, "o", "h"] +[69.659822, "o", "a"] +[69.779992, "o", "t"] +[69.896515, "o", " "] +[70.019759, "o", "t"] +[70.13192, "o", "h"] +[70.217266, "o", "e"] +[70.315651, "o", "r"] +[70.40667, "o", "e"] +[70.467033, "o", " "] +[70.605909, "o", "i"] +[70.709012, "o", "s"] +[70.797622, "o", " "] +[70.954744, "o", "n"] +[71.032528, "o", "o"] +[71.337134, "o", " "] +[71.504564, "o", "i"] +[71.604101, "o", "n"] +[71.680536, "o", "i"] +[71.803535, "o", "t"] +[71.883327, "o", " "] +[72.003128, "o", "t"] +[72.203373, "o", "a"] +[72.295758, "o", "s"] +[72.399629, "o", "k"] +[72.575918, "o", " "] +[72.979851, "o", "y"] +[73.075825, "o", "e"] +[73.264132, "o", "t"] +[73.611752, "o", ","] +[73.708124, "o", " "] +[73.803574, "o", "s"] +[73.955095, "o", "i"] +[74.019777, "o", "n"] +[74.051016, "o", "c"] +[74.141735, "o", "e"] +[74.237855, "o", " "] +[74.355776, "o", "w"] +[74.420644, "o", "e"] +[74.515071, "o", " "] +[74.600642, "o", "a"] +[74.74771, "o", "r"] +[74.836435, "o", "e"] +[74.907223, "o", " "] +[75.018533, "o", "s"] +[75.192824, "o", "t"] +[75.235418, "o", "i"] +[75.404227, "o", "l"] +[75.518303, "o", "l"] +[75.571386, "o", " "] +[75.68698, "o", "b"] +[75.779911, "o", "o"] +[75.907538, "o", "o"] +[75.956027, "o", "t"] +[76.099746, "o", "i"] +[76.180981, "o", "n"] +[76.204263, "o", "g"] +[76.94757, "o", "\r\n(gdb) "] +[77.465313, "o", "#"] +[77.60323, "o", " "] +[77.771746, "o", "l"] +[77.840755, "o", "e"] +[78.01651, "o", "t"] +[78.19322, "o", "s"] +[78.28195, "o", " "] +[78.451553, "o", "w"] +[78.720865, "o", "a"] +[78.83536, "o", "i"] +[78.971292, "o", "t"] +[79.059111, "o", " "] +[79.184909, "o", "f"] +[79.268223, "o", "o"] +[79.37142, "o", "r"] +[79.467247, "o", " "] +[79.5852, "o", "a"] +[79.675245, "o", " "] +[79.827752, "o", "b"] +[79.944592, "o", "i"] +[80.027415, "o", "t"] +[80.187537, "o", "\r\n(gdb) "] +[80.562931, "o", "c"] +[80.795397, "o", "\r\nContinuing.\r\n"] +[121.082722, "o", "^C"] +[121.089999, "o", "\r\nProgram received signal SIGINT, Interrupt.\r\n"] +[121.090173, "o", "\u001b[33mdefault_idle\u001b[m () at \u001b[32march/x86/kernel/process.c\u001b[m:689\r\n689\t}\r\n(gdb) "] +[123.106153, "o", "#"] +[123.405941, "o", " "] +[123.979574, "o", "w"] +[124.068273, "o", "e"] +[124.159089, "o", " "] +[124.51651, "o", "h"] +[124.596335, "o", "i"] +[124.801487, "o", "t"] +[124.932552, "o", " "] +[125.043099, "o", "t"] +[125.187264, "o", "h"] +[125.342082, "o", "e"] +[125.551746, "o", " "] +[126.091803, "o", "i"] +[126.283016, "o", "d"] +[126.38368, "o", "e"] +[126.611562, "o", "l"] +[127.383539, "o", "\b\u001b[K"] +[127.523397, "o", "\b\u001b[K"] +[127.764257, "o", "l"] +[127.885901, "o", "e"] +[128.102388, "o", " "] +[128.628478, "o", "f"] +[128.755103, "o", "u"] +[128.827608, "o", "n"] +[128.899218, "o", "c"] +[129.159741, "o", "t"] +[129.229055, "o", "i"] +[129.266534, "o", "o"] +[129.52278, "o", "n"] +[129.603238, "o", " "] +[129.845, "o", "s"] +[129.95226, "o", "o"] +[130.635332, "o", " "] +[132.650977, "o", "t"] +[132.764212, "o", "h"] +[132.861951, "o", "e"] +[133.002149, "o", " "] +[133.108483, "o", "s"] +[133.264264, "o", "y"] +[133.32105, "o", "s"] +[133.512254, "o", "t"] +[133.577376, "o", "e"] +[133.610919, "o", "m"] +[133.724743, "o", " "] +[133.8519, "o", "p"] +[133.940929, "o", "r"] +[134.043108, "o", "o"] +[134.163485, "o", "b"] +[134.249323, "o", "a"] +[134.360582, "o", "b"] +[134.458498, "o", "l"] +[134.725093, "o", "y"] +[134.796654, "o", " "] +[135.075413, "o", "b"] +[135.132523, "o", "o"] +[135.253576, "o", "o"] +[135.420718, "o", "t"] +[135.572909, "o", "e"] +[135.732266, "o", "d"] +[135.88038, "o", " "] +[136.124706, "o", "u"] +[136.180361, "o", "p"] +[136.691338, "o", "\r\n(gdb) "] +[137.131348, "o", "l"] +[137.857637, "o", "s"] +[138.377273, "o", "-"] +[139.249675, "o", "\b\u001b[K"] +[139.352566, "o", "\b\u001b[K"] +[139.431785, "o", "x"] +[139.546652, "o", "-"] +[139.792902, "o", "p"] +[139.980251, "o", "s"] +[140.380458, "o", "\r\n"] +[140.380619, "o", " TASK PID COMM\r\n"] +[140.381038, "o", "0xc17d02c0 0 swapper/0\r\n"] +[140.382456, "o", "0xc2530040 1 init\r\n"] +[140.383557, "o", "0xc2534080 2 kthreadd\r\n"] +[140.384521, "o", "0xc25360c0 3 rcu_gp\r\n"] +[140.385521, "o", "0xc2537100 4 rcu_par_gp\r\n"] +[140.38645, "o", "0xc2545140 5 kworker/0:0\r\n"] +[140.387399, "o", "0xc2546180 6 kworker/0:0H\r\n"] +[140.388317, "o", "0xc25481c0 7 kworker/u2:0\r\n"] +[140.389206, "o", "0xc2549000 8 mm_percpu_wq\r\n"] +[140.390069, "o", "0xc254b040 9 ksoftirqd/0\r\n"] +[140.390903, "o", "0xc254c080 10 rcu_sched\r\n"] +[140.391626, "o", "0xc254e0c0 11 migration/0\r\n"] +[140.392543, "o", "0xc2572100 12 cpuhp/0\r\n"] +[140.393323, "o", "0xc2576140 13 kdevtmpfs\r\n"] +[140.394242, "o", "0xc2594180 14 netns\r\n"] +[140.395183, "o", "0xc26211c0 15 oom_reaper\r\n"] +[140.396009, "o", "0xc2623000 16 writeback\r\n"] +[140.39683, "o", "0xc26300c0 32 kblockd\r\n"] +[140.397681, "o", "\u001b[m--Type for more, q to quit, c to continue without paging--"] +[141.757204, "o", "q"] +[142.547532, "o", "\r\n"] +[142.547753, "o", "Quit\r\n(gdb) "] +[143.850961, "o", "#"] +[144.070245, "o", " "] +[144.195434, "o", "w"] +[144.279713, "o", "e"] +[144.408497, "o", " "] +[144.552695, "o", "d"] +[144.65939, "o", "o"] +[144.880631, "o", " "] +[145.079566, "o", "s"] +[145.195271, "o", "e"] +[145.368428, "o", "e"] +[145.607373, "o", "t"] +[145.79615, "o", " "] +[145.89913, "o", "t"] +[146.045195, "o", "h"] +[146.09891, "o", "e"] +[146.227914, "o", " "] +[146.463823, "o", "i"] +[146.543985, "o", "n"] +[146.618976, "o", "i"] +[146.707111, "o", "t"] +[146.83548, "o", " "] +[146.945151, "o", "t"] +[147.163454, "o", "a"] +[147.235326, "o", "s"] +[147.425145, "o", "k"] +[148.247683, "o", ","] +[148.376176, "o", " "] +[148.526531, "o", "s"] +[148.616468, "o", "o"] +[148.722313, "o", " "] +[148.904045, "o", "l"] +[148.97297, "o", "e"] +[149.130799, "o", "t"] +[149.367908, "o", "s"] +[149.544347, "o", " "] +[150.095879, "o", "\b\u001b[K"] +[150.228108, "o", "\b\u001b[K"] +[150.347046, "o", "\b\u001b[K"] +[151.10371, "o", "t"] +[151.28464, "o", "s"] +[151.378986, "o", " "] +[151.536173, "o", "i"] +[151.64024, "o", "n"] +[151.907639, "o", "s"] +[152.111461, "o", "p"] +[152.219417, "o", "e"] +[152.33727, "o", "c"] +[152.609681, "o", "t"] +[152.691632, "o", " "] +[152.883529, "o", "i"] +[153.039608, "o", "t"] +[154.483232, "o", "\r\n"] +[154.483296, "o", "(gdb) "] +[158.688904, "o", "\r\u001b[C\u001b[16@reverse-i-search)`':\u001b[C"] +[159.808627, "o", "\b\b\bp': # we do see the init task, so lets inspect it\b\b\b\b\b\b\b"] +[159.973821, "o", "\r\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[Cr': # we hit the idle function so the system probably booted up\u001b[A\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C"] +[160.059576, "o", "\r\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[17Pi': print (struct task_struct *)0xc17d02c0\r\n\r\u001b[K\u001b[A\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C"] +[160.123058, "o", "\b\b\b\u001b[1@n\u001b[C\u001b[C\u001b[C"] +[160.189924, "o", "\b\b\b\u001b[1@t\u001b[C\u001b[C\u001b[C"] +[160.92358, "o", "\r\u001b[C\u001b[21Pgdb)\u001b[C"] +[161.345471, "o", "\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C"] +[162.195311, "o", "\b\b\b\b\b\b\b\b\b\b\u001b[K"] +[166.763546, "o", "0xc2530040"] +[167.811211, "o", "\r\n"] +[167.812061, "o", "$2 = (struct task_struct *) \u001b[34m0xc2530040\u001b[m\r\n(gdb) "] +[170.690863, "o", "print (struct task_struct *)0xc2530040"] +[170.960299, "o", "\r\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C"] +[171.19283, "o", "\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C"] +[171.767394, "o", "\u001b[C"] +[172.24119, "o", "\u001b[C\u001b[1@(\b"] +[172.752175, "o", "\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C"] +[173.806767, "o", ")"] +[174.75601, "o", "-"] +[175.107407, "o", ">"] +[176.273234, "o", "i"] +[176.403267, "o", "n"] +[176.459331, "o", "i"] +[176.571028, "o", "t"] +[176.939329, "o", "\r\n"] +[176.949546, "o", "There is no member named init.\r\n(gdb) "] +[177.801366, "o", "print ((struct task_struct *)0xc2530040)->init"] +[178.822566, "o", "\b\u001b[K"] +[178.959575, "o", "\b\u001b[K"] +[179.083502, "o", "\b\u001b[K"] +[179.220157, "o", "\b\u001b[K"] +[179.455399, "o", "p"] +[179.55676, "o", "i"] +[179.637515, "o", "d"] +[179.812349, "o", "\r\n"] +[179.828661, "o", "$3 = 1\r\n(gdb) "] +[180.470819, "o", "print ((struct task_struct *)0xc2530040)->pid"] +[181.504206, "o", "\b\b\b\u001b[K"] +[182.272216, "o", "c"] +[182.336938, "o", "o"] +[182.535532, "o", "m"] +[182.634755, "o", "m"] +[183.843339, "o", "\r\n"] +[183.859914, "o", "$4 = \"init\\000er/0\\000\\000\\000\\000\\000\\000\"\r\n(gdb) "] +[185.336129, "o", "#"] +[185.499751, "o", " "] +[185.690685, "o", "l"] +[185.843492, "o", "e"] +[186.031657, "o", "t"] +[186.27479, "o", "s"] +[186.543226, "o", " "] +[186.736697, "o", "t"] +[186.907736, "o", "r"] +[187.010908, "o", "y"] +[187.22462, "o", " "] +[187.408223, "o", "t"] +[187.497188, "o", "o"] +[187.602223, "o", " "] +[187.680452, "o", "t"] +[188.29134, "o", "\b\u001b[K"] +[188.386986, "o", "c"] +[188.499959, "o", "h"] +[188.586567, "o", "a"] +[188.691207, "o", "n"] +[188.76476, "o", "g"] +[188.81181, "o", "e"] +[188.898677, "o", " "] +[189.035371, "o", "t"] +[189.092408, "o", "h"] +[189.223294, "o", "e"] +[189.304291, "o", " "] +[189.442809, "o", "n"] +[189.508884, "o", "a"] +[189.656275, "o", "m"] +[189.76372, "o", "e"] +[190.207667, "o", " "] +[190.430618, "o", "o"] +[190.533474, "o", "f"] +[190.650813, "o", " "] +[190.834946, "o", "i"] +[190.930808, "o", "n"] +[191.033332, "o", "i"] +[191.507208, "o", "\b\u001b[K"] +[191.632261, "o", "\b\u001b[K"] +[191.759484, "o", "\b\u001b[K"] +[191.875256, "o", "t"] +[191.955649, "o", "h"] +[192.066747, "o", "e"] +[192.139401, "o", " "] +[192.263673, "o", "i"] +[192.330469, "o", "n"] +[192.411275, "o", "i"] +[192.486134, "o", "t"] +[192.603117, "o", " "] +[192.760821, "o", "t"] +[193.200036, "o", "a"] +[193.312331, "o", "s"] +[193.394453, "o", "k"] +[193.64356, "o", "\r\n(gdb) "] +[194.059159, "o", "# lets try to change the name of the init task"] +[194.200533, "o", "\r\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[Cprint ((struct task_struct *)0xc2530040)->comm"] +[194.696871, "o", "\r\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C"] +[195.001944, "o", "\u001b[1P"] +[195.130213, "o", "\u001b[1P"] +[195.25889, "o", "\u001b[1P"] +[195.371085, "o", "\u001b[1P"] +[195.467539, "o", "\u001b[1@s"] +[195.576466, "o", "\u001b[1@e"] +[195.674887, "o", "\u001b[C\u001b[1@t\b"] +[196.091385, "o", "\u001b[1P"] +[196.667968, "o", "\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C"] +[197.372014, "o", "="] +[197.836566, "o", "\""] +[198.397868, "o", "m"] +[198.641201, "o", "y"] +[198.786448, "o", " "] +[199.202986, "o", "i"] +[199.328892, "o", "n"] +[199.399589, "o", "i"] +[199.546621, "o", "t"] +[199.953016, "o", "\""] +[200.507538, "o", "\r\n"] +[200.509222, "o", "(gdb) "] +[201.635061, "o", "l"] +[202.13135, "o", "x"] +[202.315347, "o", "-"] +[202.499865, "o", "p"] +[202.599957, "o", "s"] +[203.002948, "o", "\r\n"] +[203.003048, "o", " TASK PID COMM\r\n"] +[203.00358, "o", "0xc17d02c0 0 swapper/0\r\n"] +[203.005085, "o", "0xc2530040 1 my init\r\n"] +[203.006102, "o", "0xc2534080 2 kthreadd\r\n"] +[203.006929, "o", "0xc25360c0 3 rcu_gp\r\n"] +[203.007705, "o", "0xc2537100 4 rcu_par_gp\r\n"] +[203.008617, "o", "0xc2545140 5 kworker/0:0\r\n"] +[203.009353, "o", "0xc2546180 6 kworker/0:0H\r\n"] +[203.010181, "o", "0xc25481c0 7 kworker/u2:0\r\n"] +[203.011118, "o", "0xc2549000 8 mm_percpu_wq\r\n"] +[203.01204, "o", "0xc254b040 9 ksoftirqd/0\r\n"] +[203.012888, "o", "0xc254c080 10 rcu_sched\r\n"] +[203.013708, "o", "0xc254e0c0 11 migration/0\r\n"] +[203.014465, "o", "0xc2572100 12 cpuhp/0\r\n"] +[203.015189, "o", "0xc2576140 13 kdevtmpfs\r\n"] +[203.015958, "o", "0xc2594180 14 netns\r\n"] +[203.016734, "o", "0xc26211c0 15 oom_reaper\r\n"] +[203.017489, "o", "0xc2623000 16 writeback\r\n"] +[203.018341, "o", "0xc26300c0 32 kblockd\r\n"] +[203.019184, "o", "\u001b[m--Type for more, q to quit, c to continue without paging--"] +[204.630436, "o", "q"] +[205.219018, "o", "\r\n"] +[205.21918, "o", "Quit\r\n(gdb) "] +[205.798038, "o", "#"] +[206.120012, "o", " "] +[206.779589, "o", "i"] +[206.88812, "o", "t"] +[206.992104, "o", " "] +[207.15961, "o", "l"] +[207.320891, "o", "o"] +[207.459826, "o", "o"] +[207.53892, "o", "k"] +[208.587042, "o", "s"] +[208.698803, "o", " "] +[208.831616, "o", "l"] +[209.003424, "o", "i"] +[209.131421, "o", "k"] +[209.216867, "o", "e"] +[209.312937, "o", " "] +[209.428128, "o", "i"] +[209.555208, "o", "t"] +[209.604274, "o", " "] +[209.818705, "o", "w"] +[209.898874, "o", "o"] +[210.09889, "o", "r"] +[210.579589, "o", "k"] +[210.714357, "o", "e"] +[210.840035, "o", "d"] +[210.966315, "o", ","] +[211.052435, "o", " "] +[211.231717, "o", "l"] +[211.310902, "o", "e"] +[211.454859, "o", "t"] +[211.664297, "o", "s"] +[211.723802, "o", " "] +[211.882806, "o", "v"] +[211.998969, "o", "e"] +[212.051506, "o", "r"] +[212.232489, "o", "i"] +[212.359529, "o", "f"] +[212.522225, "o", "y"] +[212.626748, "o", " "] +[212.871092, "o", "o"] +[212.971579, "o", "n"] +[213.059427, "o", " "] +[213.146803, "o", "t"] +[213.258418, "o", "h"] +[213.352468, "o", "e"] +[213.426922, "o", " "] +[213.58702, "o", "s"] +[213.627031, "o", "e"] +[213.712627, "o", "r"] +[213.770319, "o", "i"] +[213.875246, "o", "a"] +[213.970606, "o", "l"] +[214.04505, "o", " "] +[214.281377, "o", "t"] +[214.368606, "o", "e"] +[214.451128, "o", "r"] +[214.525736, "o", "m"] +[214.626505, "o", "i"] +[214.732978, "o", "n"] +[214.843079, "o", "a"] +[214.946548, "o", "l"] +[215.040742, "o", " "] +[215.129001, "o", "a"] +[215.228707, "o", "s"] +[215.440824, "o", " "] +[215.516404, "o", "w"] +[215.602238, "o", "e"] +[215.668078, "o", "l"] +[215.795141, "o", "l"] +[216.115205, "o", "\r\n(gdb) "] +[216.777487, "o", "quit\r\n"] +[216.777925, "o", "A debugging session is active.\r\n\r\n\tInferior 1 [process 1] will be detached.\r\n\r\nQuit anyway? (y or n) "] +[217.515133, "o", "y"] +[217.634805, "o", "\r\nDetaching from program: /linux/vmlinux, process 1\r\n"] +[217.635574, "o", "Ending remote debugging.\r\n"] +[217.635669, "o", "[Inferior 1 (process 1) detached]\r\n"] +[217.641009, "o", "make: *** [qemu/Makefile:54: gdb] Interrupt\r\n"] +[217.64117, "o", "\r\n$ "] +[219.594723, "o", "m"] +[219.722708, "o", "i"] +[219.798934, "o", "n"] +[219.896002, "o", "i"] +[219.930619, "o", "c"] +[220.032412, "o", "o"] +[220.090198, "o", "m"] +[220.218551, "o", "d"] +[220.306723, "o", " "] +[220.370933, "o", "-"] +[220.707384, "o", "D"] +[220.896138, "o", " "] +[221.002084, "o", "s"] +[221.096694, "o", "e"] +[221.183389, "o", "r"] +[221.794618, "o", "i"] +[221.921462, "o", "a"] +[221.996654, "o", "l"] +[222.162988, "o", "."] +[222.392328, "o", "p"] +[222.557563, "o", "t"] +[222.764497, "o", "s"] +[223.187221, "o", "\r\n"] +[223.187455, "o", "sh: 2: minicomd: not found\r\n$ "] +[224.66724, "o", "^[[A"] +[225.402255, "o", "\b \b"] +[225.554826, "o", "\b \b"] +[225.691215, "o", "\b \b\b \b"] +[226.362654, "o", "m"] +[226.467209, "o", "i"] +[226.562857, "o", "n"] +[226.611636, "o", "i"] +[227.616059, "o", "\t"] +[228.031284, "o", "\b\b"] +[228.626209, "o", "c"] +[228.648332, "o", "o"] +[228.698461, "o", "m"] +[228.962825, "o", " "] +[229.102646, "o", "-"] +[229.368508, "o", "D"] +[229.495185, "o", " "] +[229.758139, "o", "s"] +[229.818591, "o", "e"] +[229.898678, "o", "r"] +[229.974762, "o", "i"] +[230.066622, "o", "a"] +[230.152183, "o", "l"] +[230.320361, "o", "."] +[230.535407, "o", "p"] +[230.685811, "o", "t"] +[230.858324, "o", "r"] +[231.475343, "o", "\b \b"] +[231.555055, "o", "s"] +[232.066837, "o", "\r\n"] +[232.067795, "o", "\u001b[!p\u001b[?3;4l\u001b[4l\u001b>\u001b[0m\u001b(B\u001b[?1h\u001b=\u001b[H\u001b[2J"] +[232.068206, "o", "\u001b[?12l\u001b[?25h\nWelcome to minicom 2.7.1\r\n\nOPTIONS: I18n \r\nCompiled on Dec 23 2019, 02:06:26.\r\nPort serial.pts, 23:04:03\r\n\nPress CTRL-A Z for help on special keys\r\n\n"] +[233.35075, "o", "\n"] +[233.351449, "o", "Poky (Yocto Project Reference Distro) 2.3 qemux86 /dev/hvc0"] +[233.351645, "o", "\r\n"] +[233.352178, "o", "\n"] +[233.352783, "o", "qemux86 login: "] +[234.133563, "o", "r"] +[234.195403, "o", "o"] +[234.32743, "o", "o"] +[234.389808, "o", "t"] +[234.499891, "o", "\r\n"] +[234.57783, "o", "root@qemux86:~# "] +[236.736246, "o", "p"] +[236.82051, "o", "s"] +[237.011059, "o", " "] +[237.771638, "o", "|"] +[238.06792, "o", " "] +[238.256099, "o", "g"] +[238.451007, "o", "r"] +[238.539927, "o", "e"] +[238.556716, "o", "p"] +[238.721872, "o", " "] +[238.931081, "o", "i"] +[238.997795, "o", "n"] +[239.083429, "o", "i"] +[239.163794, "o", "t"] +[239.752972, "o", "\r\n"] +[239.829798, "o", " 1 root 2004 S {my init} init [5]"] +[239.830096, "o", "\r\n"] +[239.831476, "o", " 233 root 2828 S grep init"] +[239.831842, "o", "\r\n"] +[239.837405, "o", "root@qemux86:~# "] +[241.836556, "o", "#"] +[242.119664, "o", " "] +[242.29265, "o", "l"] +[242.475695, "o", "o"] +[242.571459, "o", "o"] +[242.659603, "o", "k"] +[242.771906, "o", "s"] +[242.961525, "o", " "] +[242.961819, "o", "l"] +[243.115372, "o", "i"] +[243.267339, "o", "k"] +[243.363311, "o", "e"] +[243.507086, "o", " "] +[243.748592, "o", "i"] +[243.875193, "o", "t"] +[243.947777, "o", " "] +[244.060305, "o", "r"] +[244.234349, "o", "e"] +[244.234757, "o", "a"] +[244.279319, "o", "l"] +[244.391006, "o", "l"] +[244.507177, "o", "y"] +[244.603117, "o", " "] +[244.737167, "o", "w"] +[244.821576, "o", "o"] +[244.925326, "o", "r"] +[245.107148, "o", "k"] +[245.227926, "o", "e"] +[245.314038, "o", "d"] +[245.596368, "o", "!"] +[246.03658, "o", "\r\n"] +[246.038158, "o", "root@qemux86:~# "] +[246.855569, "o", "\u001b[0m\u001b(B\u001b[7m\u001b[20;1H\u001b[K\u001b[?12l\u001b[?25h\u001b[?25lCTRL-A Z for help | 115200 8N1 | NOR | Minicom 2.7.1 | VT102 | Offline | al.pts\u001b[?12l\u001b[?25h\u001b[18;17H"] +[247.531022, "o", "\u001b[8;30H\u001b[?25l\u001b[0m\u001b(B\u001b(0lqqqqqqqqqqqqqqqqqqqqqqk\u001b[9;30Hx\u001b[0m\u001b(B Leave Minicom? \u001b[0m\u001b(B\u001b(0x\u001b[10;30Hx\u001b[0m\u001b(B No \u001b[0m\u001b(B\u001b(0x\u001b[11;30Hmqqqqqqqqqqqqqqqqqqqqqqj\u001b[10;51H\u001b[?25l\u001b[10;33H\u001b[0m\u001b(B\u001b[7m Yes "] +[249.067296, "o", "\u001b[?12l\u001b[?25h\u001b[8;1H\u001b[0m\u001b(BPress CTRL-A Z for help on special keys \u001b[9;1H \u001b[10;1H \u001b[11;1HPoky (Yocto Project Reference Distro) 2.3 qemux86 /de\u001b[18;17H\u001b[0m\u001b(B\u001b[7m\u001b[?12l\u001b[?25h"] +[249.067498, "o", "\u001b[?12l\u001b[?25h\u001b[0m\u001b(B\u001b[H\u001b[2J\u001b[?12l\u001b[?25h\u001b[?1l\u001b>\u001b[!p\u001b[?3;4l\u001b[4l\u001b>"] +[249.067617, "o", "$ "] +[250.232829, "o", "\r\n"] diff --git a/Documentation/teaching/res/intr_x86.cast b/Documentation/teaching/res/intr_x86.cast new file mode 100644 index 00000000000000..d92b215e8c73c0 --- /dev/null +++ b/Documentation/teaching/res/intr_x86.cast @@ -0,0 +1,3663 @@ +{"version": 2, "width": 80, "height": 24, "timestamp": 1616275302, "idle_time_limit": 0.3, "env": {"SHELL": null, "TERM": "xterm"}} +[0.002597, "o", "$ "] +[1.210773, "o", "m"] +[1.257108, "o", "a"] +[1.329655, "o", "k"] +[1.444271, "o", "e"] +[2.225029, "o", " "] +[2.486972, "o", "g"] +[2.571123, "o", "d"] +[2.712604, "o", "b"] +[3.137145, "o", "\r\n"] +[3.142592, "o", "gdb -ex \"target remote localhost:1234\" /linux/vmlinux\r\n"] +[3.178567, "o", "\u001b[35;1m\u001b[35;1mGNU gdb \u001b[m\u001b[35;1m(Ubuntu 9.2-0ubuntu1~20.04) \u001b[m\u001b[35;1m9.2\u001b[m\u001b[35;1m\r\n\u001b[m\u001b[mCopyright (C) 2020 Free Software Foundation, Inc.\r\nLicense GPLv3+: GNU GPL version 3 or later \r\nThis is free software: you are free to change and redistribute it.\r\nThere is NO WARRANTY, to the extent permitted by law.\r\nType \"show copying\" and \"show warranty\" for details.\r\nThis GDB was configured as \"x86_64-linux-gnu\".\r\nType \"show configuration\" for configuration details.\r\nFor bug reporting instructions, please see:\r\n.\r\nFind the GDB manual and other documentation resources online at:\r\n .\r\n\r\nFor help, type \"help\".\r\nType \"apropos word\" to search for commands related to \"word\"...\r\n"] +[3.178839, "o", "Reading symbols from \u001b[32m/linux/vmlinux\u001b[m...\r\n"] +[3.823297, "o", "Remote debugging using localhost:1234\r\n"] +[3.83722, "o", "\u001b[34m0xc15dcb62\u001b[m in \u001b[33mdefault_idle\u001b[m () at \u001b[32m./arch/x86/include/asm/irqflags.h\u001b[m:60\r\n"] +[3.837438, "o", "60\t\tasm volatile(\"sti; hlt\": : :\"memory\");\r\n"] +[3.8378, "o", "(gdb) "] +[4.409342, "o", "#"] +[4.546448, "o", " "] +[4.688783, "o", "l"] +[4.769813, "o", "e"] +[4.896733, "o", "t"] +[5.08933, "o", "s"] +[5.136694, "o", " "] +[5.248955, "o", "i"] +[5.312794, "o", "n"] +[5.544699, "o", "s"] +[7.409078, "o", "p"] +[7.489474, "o", "e"] +[7.549338, "o", "c"] +[7.761283, "o", "t"] +[7.849505, "o", " "] +[7.969004, "o", "t"] +[8.040895, "o", "h"] +[8.152802, "o", "e"] +[8.224916, "o", " "] +[9.001867, "o", "i"] +[9.058176, "o", "n"] +[9.136243, "o", "t"] +[9.208632, "o", "e"] +[9.328647, "o", "r"] +[9.488109, "o", "r"] +[9.560597, "o", "u"] +[9.616715, "o", "p"] +[9.704462, "o", "t"] +[9.768614, "o", " "] +[9.976697, "o", "d"] +[10.145135, "o", "e"] +[10.289399, "o", "s"] +[10.328932, "o", "c"] +[10.52121, "o", "r"] +[10.592935, "o", "i"] +[10.656572, "o", "p"] +[10.772899, "o", "t"] +[10.880715, "o", "o"] +[10.98227, "o", "r"] +[11.128859, "o", " "] +[11.25683, "o", "t"] +[11.337146, "o", "a"] +[11.448688, "o", "b"] +[12.394915, "o", "l"] +[12.872587, "o", "e"] +[12.944672, "o", "\r\n"] +[12.944804, "o", "(gdb) "] +[15.485987, "o", "m"] +[15.546043, "o", "o"] +[15.688577, "o", "n"] +[15.73704, "o", "i"] +[15.832722, "o", "t"] +[15.928729, "o", "o"] +[15.993344, "o", "r"] +[16.057039, "o", " "] +[16.15264, "o", "i"] +[16.208593, "o", "n"] +[16.304808, "o", "f"] +[16.361885, "o", "o"] +[16.464359, "o", " "] +[16.577512, "o", "r"] +[16.655247, "o", "e"] +[16.817514, "o", "g"] +[17.064809, "o", "i"] +[17.297122, "o", "s"] +[17.903967, "o", "t"] +[18.064885, "o", "e"] +[18.144619, "o", "r"] +[18.312803, "o", "s"] +[18.392803, "o", "\r\n"] +[18.392969, "o", "EAX=00000000 EBX=00000000 ECX=ffffffff EDX"] +[18.39299, "o", "=0000"] +[18.393076, "o", "0000\r\r\nESI=00000000 EDI=00000000 EBP=c17cff1c ESP=c17cff18\r\r\nEIP=c15dcb62 EFL=002002"] +[18.393171, "o", "46 [---Z-P-] CPL=0 II=0 A20=1 SMM=0 HLT=1\r\r\n"] +[18.393206, "o", "ES =007b 00000000 ffffffff 00cff300 DPL=3 DS [-WA]\r\r\nCS =0060 00000000 ffffffff 00cf9a00 DPL=0 CS32 [-R-]\r\r\nSS =0068 00000000 ffffffff 00cf9300 DPL=0 DS [-WA]\r\r\nDS =007b 00000000 ffffffff 00c"] +[18.393256, "o", "ff300 DPL=3 DS [-WA]\r\r\n"] +[18.393291, "o", "FS =00d8 0e47b000 ffffffff 008f9300 DPL=0 DS16 [-WA]\r\r\n"] +[18.393383, "o", "GS =00e0 cfdcb200 00000018 00409100 DPL=0 DS [--A]\r\r\nLDT=0000 00000000 00000000 00008200 DPL=0 LDT\r"] +[18.393415, "o", "\r\nTR =0080 ff806000 0000407b 00008900 DPL=0 T"] +[18.393444, "o", "SS32-avl\r\r\nGDT= ff801000 000000f"] +[18.393521, "o", "f\r\r\nIDT= ff800000 000007ff\r\r\nCR0=80050033 CR2=08087000 CR3=0"] +[18.393616, "o", "760b000 CR4=00000690\r\r\nDR0=00000000 DR1=00000000 "] +[18.393737, "o", "DR2=00000000 DR3=00000000 \r\r\nDR6=ffff0ff0 DR7=00000400\r\r\nEFER=0000000000000000\r\r\nFCW=037f FSW=0000 [ST=0] FTW=00 MXCSR=00001f80\r\r\nFPR0=0000000000000000 0000 FPR1=0000000000000000 0000\r\r\nFPR2=0000000000000000 0000 FPR3=0000000000000000 0000\r\r\nFPR4=0000000000000000 0000 FPR"] +[18.393842, "o", "5=0000000000000000 0000\r\r\nFPR6=0000000000000000 0000 FPR7=0000000000000000 0000\r\r\nXMM00=00000000000000000000000000000000 XMM01=00000000000000000000000000000000\r\r\nXMM02=00000000000000000000000000000000 XMM03=00000000000000000000000000000000\r\r\nXMM04=00000000000000000000000000000000 XMM05=00000000000000000000000000000000\r\r\n"] +[18.393943, "o", "XMM06=00000000000000000000000000000000 XMM07=00000000000000000000000000000000\r\r\n"] +[18.394056, "o", "(gdb) "] +[19.913615, "o", "s"] +[19.986157, "o", "e"] +[20.072668, "o", "t"] +[20.144869, "o", " "] +[20.312693, "o", "$"] +[21.121104, "o", "i"] +[21.208933, "o", "d"] +[21.448991, "o", "t"] +[21.608711, "o", "r"] +[21.849094, "o", "="] +[23.480497, "o", "0"] +[23.632689, "o", "x"] +[26.420845, "o", "ff800000"] +[27.416954, "o", "\r\n"] +[27.43416, "o", "(gdb) "] +[30.696589, "o", "#"] +[30.808186, "o", " "] +[30.960552, "o", "l"] +[31.040432, "o", "e"] +[31.168404, "o", "t"] +[31.344474, "o", "s"] +[31.440553, "o", " "] +[31.560787, "o", "l"] +[31.712465, "o", "o"] +[31.832592, "o", "o"] +[31.896851, "o", "k"] +[32.041676, "o", " "] +[32.224318, "o", "a"] +[32.432482, "o", "t"] +[32.52061, "o", " "] +[32.66456, "o", "t"] +[32.728814, "o", "h"] +[32.872205, "o", "e"] +[32.952481, "o", " "] +[33.048492, "o", "f"] +[33.136789, "o", "i"] +[33.216711, "o", "r"] +[35.611379, "o", "s"] +[35.94449, "o", "t"] +[36.241248, "o", " "] +[36.768841, "o", "e"] +[36.985022, "o", "n"] +[37.104804, "o", "t"] +[37.208381, "o", "r"] +[37.322459, "o", "y"] +[37.569352, "o", "\r\n"] +[37.569487, "o", "(gdb) "] +[53.808615, "o", "p"] +[53.97653, "o", "r"] +[54.409035, "o", "i"] +[54.528791, "o", "n"] +[54.664632, "o", "t"] +[54.816835, "o", " "] +[55.391071, "o", "("] +[56.353133, "o", "\b\u001b[K"] +[56.705533, "o", "*"] +[56.984674, "o", "("] +[57.401646, "o", "u"] +[57.598027, "o", "i"] +[57.661018, "o", "n"] +[57.696843, "o", "t"] +[58.032644, "o", "4"] +[58.392875, "o", "_"] +[58.694782, "o", "\b\u001b[K"] +[58.794268, "o", "\b\u001b[K"] +[58.858317, "o", "6"] +[58.912611, "o", "4"] +[59.10469, "o", "_"] +[59.336364, "o", "t"] +[59.74071, "o", "*"] +[59.880803, "o", ")"] +[60.704722, "o", "$"] +[60.98509, "o", "i"] +[61.094169, "o", "d"] +[61.321053, "o", "t"] +[61.480931, "o", "r"] +[62.248357, "o", "\r\n"] +[62.253459, "o", "$1 = 13933448952811676512\r\n"] +[62.253518, "o", "(gdb) "] +[63.977116, "o", "print *(uint64_t*)$idtr"] +[64.304911, "o", "\r\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C"] +[64.456992, "o", "\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C"] +[65.080291, "o", "/ *(uint64_t*)$idtr\r\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C"] +[65.416962, "o", "\b\u001b[1P *(uint64_t*)$idtr\r\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C"] +[65.497236, "o", "\u001b[C *(uint64_t*)$idtr\r\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C"] +[65.609571, "o", "/ *(uint64_t*)$idtr\r\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C"] +[65.889038, "o", "x *(uint64_t*)$idtr\r\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C"] +[65.993566, "o", "\r\n"] +[65.993861, "o", "$2 = 0xc15d8e000060e360\r\n(gdb) "] +[68.328319, "o", "#"] +[68.608617, "o", " "] +[71.984353, "o", "t"] +[72.048655, "o", "h"] +[72.144372, "o", "e"] +[72.281093, "o", " "] +[73.216663, "o", "t"] +[73.240818, "o", "o"] +[73.368648, "o", "p"] +[73.464534, "o", " "] +[73.576581, "o", "1"] +[73.85665, "o", "6"] +[74.11258, "o", " "] +[74.26418, "o", "b"] +[74.408592, "o", "i"] +[74.488681, "o", "t"] +[74.736972, "o", "s"] +[74.785856, "o", " "] +[75.320694, "o", "|"] +[75.408819, "o", " "] +[75.928337, "o", "l"] +[76.105728, "o", "o"] +[76.169532, "o", "w"] +[76.257743, "o", "e"] +[76.329013, "o", "r"] +[76.432359, "o", " "] +[76.632745, "o", "1"] +[76.896585, "o", "6"] +[77.265121, "o", " "] +[77.456494, "o", "b"] +[77.55218, "o", "i"] +[77.640654, "o", "t"] +[78.424438, "o", "s"] +[78.576779, "o", " "] +[79.9295, "o", "y"] +[80.207199, "o", "i"] +[81.156076, "o", "e"] +[81.342966, "o", "l"] +[81.464422, "o", "d"] +[81.544827, "o", " "] +[81.712818, "o", "t"] +[81.777406, "o", "h"] +[81.896729, "o", "e"] +[82.016527, "o", " "] +[83.160488, "o", "h"] +[83.234273, "o", "a"] +[83.373947, "o", "n"] +[83.457305, "o", "d"] +[83.560818, "o", "l"] +[83.648667, "o", "e"] +[83.736063, "o", "r"] +[83.832315, "o", " "] +[83.904474, "o", "a"] +[84.01645, "o", "d"] +[84.17622, "o", "d"] +[84.441953, "o", "r"] +[84.515239, "o", "e"] +[84.648335, "o", "s"] +[84.793111, "o", "s"] +[84.889616, "o", "\r\n"] +[84.889676, "o", "(gdb) "] +[87.657834, "o", "p"] +[88.328467, "o", "r"] +[88.457638, "o", "i"] +[88.528292, "o", "n"] +[88.592033, "o", "t"] +[89.216915, "o", " "] +[92.096493, "o", "("] +[92.318607, "o", "v"] +[92.401482, "o", "o"] +[92.466573, "o", "i"] +[92.552621, "o", "d"] +[92.71269, "o", " "] +[93.041067, "o", "*"] +[93.136553, "o", ")"] +[100.020065, "o", "0xc15d"] +[102.915226, "o", "e360"] +[103.87249, "o", "\r\n"] +[103.872714, "o", "$3 = (void *) \u001b[34m0xc15de360\u001b[m <\u001b[33masm_exc_divide_error\u001b[m>\r\n(gdb) "] +[108.216616, "o", "#"] +[109.256576, "o", " "] +[110.240848, "o", "l"] +[110.461673, "o", "o"] +[110.584304, "o", "o"] +[110.624509, "o", "k"] +[110.735996, "o", "s"] +[110.840183, "o", " "] +[111.256571, "o", "l"] +[111.484022, "o", "i"] +[111.992971, "o", "k"] +[112.192703, "o", "e"] +[112.333386, "o", " "] +[112.425705, "o", "t"] +[112.499041, "o", "h"] +[112.600559, "o", "e"] +[112.718099, "o", " "] +[112.88867, "o", "h"] +[112.952432, "o", "a"] +[113.056426, "o", "n"] +[113.690225, "o", "d"] +[113.936437, "o", "l"] +[114.032295, "o", "e"] +[114.112253, "o", "r"] +[114.215985, "o", " "] +[114.377083, "o", "f"] +[114.452839, "o", "o"] +[114.592553, "o", "r"] +[114.66679, "o", " "] +[114.809895, "o", "t"] +[114.903547, "o", "h"] +[114.976958, "o", "e"] +[115.112512, "o", " "] +[116.177138, "o", "d"] +[116.32823, "o", "i"] +[116.424191, "o", "v"] +[116.496866, "o", "i"] +[116.600329, "o", "s"] +[116.712739, "o", "i"] +[116.752259, "o", "o"] +[116.937502, "o", "n"] +[117.0166, "o", " "] +[117.281132, "o", "b"] +[117.360604, "o", "y"] +[117.47323, "o", " "] +[117.704485, "o", "z"] +[117.904681, "o", "e"] +[117.989636, "o", "r"] +[118.096365, "o", "o"] +[118.232856, "o", " "] +[118.720624, "o", "e"] +[118.832219, "o", "x"] +[119.064653, "o", "c"] +[119.150155, "o", "e"] +[119.337024, "o", "p"] +[119.664612, "o", "t"] +[119.760425, "o", "i"] +[119.800127, "o", "o"] +[119.984793, "o", "n"] +[120.26515, "o", "\r\n(gdb) "] +[120.728331, "o", "#"] +[120.920995, "o", " "] +[121.064451, "o", "l"] +[121.160676, "o", "e"] +[121.536815, "o", "t"] +[121.786124, "o", "s"] +[121.896173, "o", " "] +[122.080241, "o", "l"] +[122.240363, "o", "o"] +[122.376315, "o", "o"] +[122.408674, "o", "k"] +[122.624537, "o", " "] +[122.773709, "o", "a"] +[122.945429, "o", "t"] +[123.023878, "o", " "] +[123.953626, "o", "ot"] +[124.080277, "o", "h"] +[124.200363, "o", "er"] +[124.728617, "o", " "] +[125.002233, "o", "h"] +[125.064643, "o", "a"] +[125.184089, "o", "n"] +[125.256259, "o", "d"] +[125.392308, "o", "l"] +[125.488306, "o", "e"] +[125.552441, "o", "r"] +[125.741692, "o", "s"] +[126.768744, "o", "\r\n"] +[126.768865, "o", "(gdb) "] +[127.240409, "o", "#"] +[127.480474, "o", " "] +[127.784631, "o", "b"] +[127.944921, "o", "u"] +[128.040904, "o", "t"] +[128.225533, "o", " "] +[130.732997, "o", "l"] +[130.808467, "o", "e"] +[130.968259, "o", "t"] +[131.176344, "o", "s"] +[131.32975, "o", " "] +[131.488296, "o", "d"] +[131.5519, "o", "e"] +[131.689568, "o", "f"] +[131.785467, "o", "i"] +[131.837318, "o", "n"] +[131.904581, "o", "e"] +[132.024328, "o", " "] +[132.160185, "o", "a"] +[132.271963, "o", " "] +[133.464111, "o", "g"] +[133.584141, "o", "d"] +[133.696892, "o", "b"] +[133.79681, "o", " "] +[134.217167, "o", "m"] +[134.304136, "o", "a"] +[134.408321, "o", "c"] +[134.656512, "o", "r"] +[134.721337, "o", "o"] +[134.822577, "o", " "] +[134.991044, "o", "t"] +[135.071573, "o", "o"] +[135.111997, "o", " "] +[135.272574, "o", "m"] +[135.362308, "o", "a"] +[135.456211, "o", "k"] +[135.544994, "o", "e"] +[135.616461, "o", " "] +[135.727963, "o", "t"] +[135.792452, "o", "h"] +[135.857291, "o", "i"] +[136.000758, "o", "n"] +[136.069751, "o", "g"] +[136.257128, "o", "s"] +[136.328766, "o", " "] +[136.510165, "o", "e"] +[136.633035, "o", "a"] +[136.832209, "o", "s"] +[136.960217, "o", "i"] +[137.048799, "o", "e"] +[137.150086, "o", "r"] +[138.744006, "o", "\r\n(gdb) "] +[139.568515, "o", "d"] +[139.653984, "o", "e"] +[139.784679, "o", "f"] +[139.872147, "o", "i"] +[139.912416, "o", "n"] +[139.993159, "o", "e"] +[140.104365, "o", " "] +[140.296306, "o", "i"] +[140.363786, "o", "d"] +[140.665088, "o", "t"] +[140.92836, "o", "_"] +[141.136493, "o", "e"] +[141.360228, "o", "n"] +[141.448491, "o", "t"] +[141.624792, "o", "r"] +[141.713426, "o", "y"] +[142.721123, "o", "\r\n"] +[142.721216, "o", "Type commands for definition of \"idt_entry\".\r\nEnd with a line saying just \"end\".\r\n>"] +[168.576695, "o", "s"] +[168.624243, "o", "e"] +[168.737408, "o", "t"] +[168.824664, "o", " "] +[169.336803, "o", "$"] +[169.680096, "o", "t"] +[169.88103, "o", "m"] +[170.352694, "o", "p"] +[170.935949, "o", "="] +[173.920126, "o", "\b\u001b[K"] +[174.064076, "o", " "] +[174.216635, "o", "="] +[174.303854, "o", " "] +[177.336069, "o", "("] +[178.712525, "o", "$"] +[179.374062, "o", "d"] +[179.720264, "o", "i"] +[180.160492, "o", "\b\u001b[K"] +[180.289725, "o", "\b\u001b[K"] +[180.43246, "o", "\b\u001b[K"] +[181.432386, "o", "\b\u001b[K"] +[181.864182, "o", "*"] +[182.041353, "o", "("] +[182.320662, "o", "u"] +[183.224845, "o", "i"] +[183.288156, "o", "n"] +[183.424563, "o", "t"] +[183.920426, "o", "6"] +[184.016392, "o", "4"] +[184.22467, "o", "_"] +[184.505022, "o", "t"] +[185.017063, "o", "*"] +[185.520732, "o", ")"] +[186.288322, "o", "("] +[186.812369, "o", "$"] +[187.415985, "o", "i"] +[187.600868, "o", "d"] +[188.142718, "o", "t"] +[188.312029, "o", "r"] +[188.616543, "o", " "] +[189.064819, "o", "+"] +[189.264592, "o", " "] +[189.680228, "o", "8"] +[189.848238, "o", " "] +[190.617403, "o", "*"] +[190.760083, "o", " "] +[191.959835, "o", "%"] +[192.576085, "o", "\b\u001b[K"] +[192.844777, "o", "$"] +[193.048452, "o", "a"] +[193.159941, "o", "r"] +[193.376555, "o", "g"] +[193.982847, "o", "0"] +[194.431687, "o", ")"] +[197.999312, "o", "\r\n"] +[197.999456, "o", ">"] +[198.449303, "o", "p"] +[198.665073, "o", "r"] +[198.735942, "o", "i"] +[198.808033, "o", "n"] +[198.888752, "o", "t"] +[199.016048, "o", " "] +[199.417075, "o", "("] +[199.736133, "o", "v"] +[199.848015, "o", "o"] +[199.944772, "o", "i"] +[200.024132, "o", "d"] +[200.286196, "o", " "] +[200.704083, "o", ")"] +[201.349769, "o", "\b\u001b[K"] +[201.704144, "o", "*"] +[201.872195, "o", ")"] +[203.840273, "o", "("] +[205.772822, "o", "("] +[208.496616, "o", "$"] +[209.18405, "o", "t"] +[209.256793, "o", "m"] +[209.36482, "o", "p"] +[209.775923, "o", ">"] +[209.912509, "o", ">"] +[210.839657, "o", "4"] +[210.888609, "o", "8"] +[211.215805, "o", "<"] +[211.336883, "o", "<"] +[211.61268, "o", "1"] +[211.976222, "o", "6"] +[212.74975, "o", ")"] +[213.984241, "o", "|"] +[214.624311, "o", "("] +[215.122175, "o", "T"] +[215.780469, "o", "\b\u001b[K"] +[215.993115, "o", "$"] +[216.224093, "o", "t"] +[216.34352, "o", "m"] +[216.424, "o", "p"] +[217.560837, "o", "&"] +[218.328697, "o", "0"] +[218.632066, "o", "x"] +[218.968103, "o", "f"] +[219.150939, "o", "f"] +[219.727887, "o", "f"] +[219.863865, "o", "f"] +[220.912413, "o", ")"] +[222.079895, "o", ")"] +[223.568547, "o", "\r\n"] +[223.568676, "o", ">"] +[224.094006, "o", "e"] +[224.223863, "o", "n"] +[224.287703, "o", "d"] +[224.744944, "o", "\r\n"] +[224.745116, "o", "(gdb) "] +[225.951864, "o", "i"] +[226.096427, "o", "d"] +[226.680474, "o", "\b\u001b[K"] +[226.872108, "o", "t"] +[228.156402, "o", "\b\u001b[K"] +[228.205096, "o", "d"] +[228.440636, "o", "t"] +[228.631859, "o", "_"] +[229.039821, "o", "e"] +[229.167518, "o", "n"] +[229.256696, "o", "t"] +[229.433937, "o", "r"] +[229.505089, "o", "y"] +[229.631679, "o", " "] +[229.840572, "o", "0"] +[230.040276, "o", "\r\n"] +[238.488909, "o", "$4 = (void *) \u001b[34m0xc15de360\u001b[m <\u001b[33masm_exc_divide_error\u001b[m>\r\n(gdb) "] +[242.8967, "o", "s"] +[243.00697, "o", "e"] +[243.128131, "o", "t"] +[243.183821, "o", " "] +[243.402228, "o", "$"] +[243.592398, "o", "i"] +[243.967633, "o", "="] +[244.505775, "o", "0"] +[245.649837, "o", "\r\n"] +[245.667678, "o", "(gdb) "] +[246.494615, "o", "i"] +[246.951918, "o", "d"] +[247.159916, "o", "t"] +[247.288617, "o", "_"] +[247.44043, "o", "e"] +[247.606119, "o", "n"] +[247.677569, "o", "t"] +[247.824446, "o", "r"] +[247.919894, "o", "y"] +[248.064502, "o", " "] +[248.368428, "o", "$i"] +[248.812484, "o", "+"] +[248.95182, "o", "+"] +[249.296086, "o", "\r\n"] +[251.440221, "o", "$5 = (void *) \u001b[34m0xc15de360\u001b[m <\u001b[33masm_exc_divide_error\u001b[m>\r\n(gdb) "] +[252.647928, "o", "\r\n"] +[252.648357, "o", "$6 = (void *) \u001b[34m0xc15de460\u001b[m <\u001b[33masm_exc_debug\u001b[m>\r\n(gdb) "] +[253.671712, "o", "\r\n"] +[253.672097, "o", "$7 = (void *) \u001b[34m0xc15dec28\u001b[m <\u001b[33masm_exc_nmi\u001b[m>\r\n(gdb) "] +[254.280864, "o", "\r\n"] +[254.281363, "o", "$8 = (void *) \u001b[34m0xc15de440\u001b[m <\u001b[33masm_exc_int3\u001b[m>\r\n(gdb) "] +[254.976139, "o", "\r\n"] +[254.976557, "o", "$9 = (void *) \u001b[34m0xc15de370\u001b[m <\u001b[33masm_exc_overflow\u001b[m>\r\n(gdb) "] +[255.880408, "o", "\r\n"] +[255.880837, "o", "$10 = (void *) \u001b[34m0xc15de380\u001b[m <\u001b[33masm_exc_bounds\u001b[m>\r\n(gdb) "] +[256.624516, "o", "\r\n"] +[256.625069, "o", "$11 = (void *) \u001b[34m0xc15de430\u001b[m <\u001b[33masm_exc_invalid_op\u001b[m>\r\n(gdb) "] +[259.776172, "o", "#"] +[259.983837, "o", " "] +[260.136287, "o", "n"] +[260.210913, "o", "o"] +[260.615863, "o", "w"] +[260.791336, "o", " "] +[261.016063, "o", "l"] +[261.088278, "o", "e"] +[261.231738, "o", "t"] +[261.455678, "o", "s"] +[261.535942, "o", " "] +[264.927751, "o", "d"] +[265.007746, "o", "i"] +[265.080717, "o", "s"] +[265.76765, "o", "a"] +[265.920288, "o", "s"] +[266.056377, "o", "s"] +[266.663988, "o", "e"] +[266.744341, "o", "m"] +[266.91189, "o", "b"] +[267.063396, "o", "l"] +[267.136112, "o", "e"] +[267.231659, "o", " "] +[271.856077, "o", "o"] +[272.111984, "o", "n"] +[272.262555, "o", "e"] +[272.334059, "o", " "] +[272.495286, "o", "o"] +[272.567329, "o", "f"] +[272.655262, "o", " "] +[272.78918, "o", "t"] +[272.849964, "o", "h"] +[272.944092, "o", "e"] +[273.016315, "o", " "] +[273.112499, "o", "h"] +[273.184169, "o", "a"] +[273.301118, "o", "n"] +[273.399638, "o", "d"] +[273.495596, "o", "l"] +[273.600002, "o", "e"] +[273.672038, "o", "r"] +[273.871971, "o", "s"] +[274.591341, "o", "\r\n(gdb) "] +[281.693904, "o", "d"] +[281.813581, "o", "i"] +[282.81517, "o", "s"] +[283.118047, "o", "a"] +[283.2217, "o", "s"] +[284.374128, "o", "semble "] +[293.053957, "o", "a"] +[293.157762, "o", "s"] +[293.229719, "o", "m"] +[293.708297, "o", "E"] +[294.133992, "o", "\b\u001b[K"] +[294.448654, "o", "_"] +[294.706454, "o", "e"] +[294.829897, "o", "x"] +[295.462678, "o", "c"] +[296.67153, "o", "\u0007_"] +[297.946193, "o", "d"] +[298.135012, "o", "i"] +[298.685772, "o", "\b\u001b[K"] +[299.178835, "o", "i"] +[299.621804, "o", "v"] +[299.733826, "o", "i"] +[299.902348, "o", "de_error "] +[300.549988, "o", "\r\n"] +[300.567393, "o", "Dump of assembler code for function asm_exc_divide_error:\r\n"] +[300.56769, "o", " \u001b[34m0xc15de360\u001b[m <+0>:\tlea 0x0(%esi),%esi\r\n \u001b[34m0xc15de363\u001b[m <+3>:\tcld \r\n \u001b[34m0xc15de364\u001b[m <+4>:\tpush $0x0\r\n \u001b[34m0xc15de366\u001b[m <+6>:\tpush $0xc15d1ff0\r\n"] +[300.567847, "o", " \u001b[34m0xc15de36b\u001b[m <+11>:\tjmp \u001b[34m0xc15dea1f\u001b[m <\u001b[33mhandle_exception\u001b[m>\r\nEnd of assembler dump.\r\n(gdb) "] +[301.973744, "o", "#"] +[302.290429, "o", " "] +[306.773991, "o", "t"] +[306.89402, "o", "h"] +[306.989409, "o", "e"] +[307.078564, "o", " "] +[307.205506, "o", "h"] +[307.269547, "o", "a"] +[307.381896, "o", "n"] +[307.462694, "o", "d"] +[307.606051, "o", "l"] +[307.683098, "o", "e"] +[307.782006, "o", "r"] +[307.90709, "o", " "] +[308.102037, "o", "i"] +[308.205644, "o", "s"] +[308.309779, "o", " "] +[308.649061, "o", "s"] +[308.719819, "o", "a"] +[308.909582, "o", "v"] +[309.045065, "o", "i"] +[309.111041, "o", "n"] +[309.181535, "o", "g"] +[309.357861, "o", " "] +[309.791576, "o", "0"] +[310.205523, "o", "\b\u001b[K"] +[310.317771, "o", "z"] +[310.52606, "o", "e"] +[310.645389, "o", "r"] +[310.709449, "o", "o"] +[311.925803, "o", " "] +[313.89363, "o", "\r\u001b[K(gdb) # the handler is saving zero "] +[316.414294, "o", "a"] +[316.646282, "o", "n"] +[316.757604, "o", "d"] +[317.310478, "o", " "] +[318.758069, "o", "a"] +[318.854419, "o", " "] +[320.037966, "o", "k"] +[320.158956, "o", "e"] +[320.222516, "o", "r"] +[320.27824, "o", "n"] +[320.397617, "o", "e"] +[320.478095, "o", "l"] +[320.573709, "o", " "] +[320.653434, "o", "a"] +[320.765976, "o", "d"] +[320.931418, "o", "d"] +[321.115711, "o", "r"] +[321.16469, "o", "e"] +[321.317561, "o", "s"] +[321.469813, "o", "s"] +[321.533551, "o", " "] +[321.637537, "o", "o"] +[321.77303, "o", "n"] +[322.051753, "o", " "] +[322.229659, "o", "t"] +[322.334768, "o", "h"] +[322.397494, "o", "e"] +[322.493585, "o", " "] +[322.581569, "o", "s"] +[322.726173, "o", "t"] +[322.789718, "o", "a"] +[322.966867, "o", "c"] +[323.01384, "o", "k"] +[323.861459, "o", "\r\n(gdb) "] +[324.400657, "o", "#"] +[324.57396, "o", " "] +[324.718055, "o", "t"] +[324.837389, "o", "h"] +[324.925552, "o", "e"] +[325.053448, "o", "n"] +[325.131599, "o", " "] +[325.541878, "o", "c"] +[325.656711, "o", "a"] +[325.789362, "o", "l"] +[325.917952, "o", "l"] +[326.215003, "o", "s"] +[326.405569, "o", " "] +[327.862046, "o", "a"] +[328.005901, "o", " "] +[328.509434, "o", "g"] +[328.589852, "o", "e"] +[328.718961, "o", "n"] +[328.794376, "o", "e"] +[329.046122, "o", "r"] +[329.197525, "o", "i"] +[329.413451, "o", "c"] +[329.589576, "o", " "] +[334.125557, "o", "e"] +[334.229684, "o", "x"] +[334.294128, "o", "c"] +[334.397658, "o", "c"] +[334.49939, "o", "e"] +[334.582127, "o", "p"] +[334.711478, "o", "t"] +[334.796324, "o", "i"] +[334.829615, "o", "o"] +[335.309853, "o", "\b\u001b[K"] +[337.093887, "o", "o"] +[337.253827, "o", "n"] +[337.758199, "o", "\b"] +[338.268806, "o", "\b"] +[338.299872, "o", "\b"] +[338.329696, "o", "\b"] +[338.52618, "o", "\b"] +[338.693918, "o", "\b"] +[338.898897, "o", "\b"] +[338.940108, "o", "\u001b[1Peption\b\b\b\b\b\b"] +[339.31746, "o", "\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C"] +[339.987467, "o", " "] +[340.501724, "o", "h"] +[340.557371, "o", "a"] +[340.669201, "o", "n"] +[340.781539, "o", "d"] +[340.893346, "o", "l"] +[340.986467, "o", "e"] +[341.107449, "o", "r"] +[341.189818, "o", " "] +[341.310107, "o", "f"] +[341.404177, "o", "u"] +[341.477608, "o", "n"] +[341.589586, "o", "c"] +[341.805591, "o", "t"] +[341.877429, "o", "i"] +[341.917582, "o", "o"] +[342.116466, "o", "n"] +[342.669981, "o", "\r\n(gdb) "] +[344.453707, "o", "p"] +[344.573486, "o", "r"] +[344.651746, "o", "i"] +[344.710394, "o", "n"] +[344.773933, "o", "t"] +[344.853496, "o", " "] +[345.133258, "o", "("] +[345.301217, "o", "v"] +[345.423574, "o", "o"] +[345.509734, "o", "i"] +[345.611679, "o", "d"] +[346.432047, "o", "*"] +[346.853663, "o", "\b\u001b[K"] +[347.541327, "o", "*"] +[347.72567, "o", ")"] +[351.837485, "o", "0xc15d1f"] +[351.838034, "o", "f0"] +[352.917933, "o", "\r\n"] +[352.923702, "o", "$1 = (void *) \u001b[34m0xc15d1ff0\u001b[m <\u001b[33mexc_divide_error\u001b[m>\r\n(gdb) "] +[357.542556, "o", "#"] +[357.837506, "o", " "] +[361.282757, "o", "t"] +[361.413551, "o", "h"] +[361.525554, "o", "is"] +[361.653148, "o", " "] +[362.214023, "o", "l"] +[362.389469, "o", "o"] +[362.541643, "o", "o"] +[362.877329, "o", "k"] +[363.062173, "o", "s"] +[363.118787, "o", " "] +[363.269731, "o", "l"] +[363.453393, "o", "i"] +[363.621546, "o", "k"] +[363.709514, "o", "e"] +[363.862147, "o", " "] +[364.222054, "o", "t"] +[364.349865, "o", "h"] +[364.517649, "o", "e"] +[364.613482, "o", " "] +[365.373866, "o", "\b\u001b[K"] +[365.523605, "o", "\b\u001b[K"] +[365.653717, "o", "\b\u001b[K"] +[365.797269, "o", "\b\u001b[K"] +[366.093498, "o", "a"] +[366.2068, "o", " "] +[366.345557, "o", "p"] +[366.427604, "o", "o"] +[366.62164, "o", "i"] +[366.685247, "o", "n"] +[366.749103, "o", "t"] +[366.830172, "o", "e"] +[366.957418, "o", "r"] +[367.093461, "o", " "] +[367.222757, "o", "t"] +[367.270164, "o", "o"] +[367.350376, "o", " "] +[367.453392, "o", "t"] +[367.549486, "o", "h"] +[367.650301, "o", "e"] +[367.708818, "o", " "] +[367.989708, "o", "a"] +[368.125641, "o", "c"] +[368.378932, "o", "t"] +[368.468498, "o", "u"] +[368.590227, "o", "a"] +[368.710298, "o", "l"] +[368.813341, "o", " "] +[369.021547, "o", "e"] +[373.138639, "o", "c"] +[373.285424, "o", "a"] +[373.493441, "o", "p"] +[373.701029, "o", "t"] +[373.861883, "o", "i"] +[373.909667, "o", "o"] +[374.132304, "o", "\b\u001b[K"] +[374.261891, "o", "\b\u001b[K"] +[374.414046, "o", "\b\u001b[K"] +[374.565314, "o", "\b\u001b[K"] +[374.709345, "o", "\b\u001b[K"] +[374.853559, "o", "\b\u001b[K"] +[374.886, "o", "x"] +[375.125274, "o", "c"] +[375.203406, "o", "e"] +[375.277276, "o", "p"] +[375.42167, "o", "t"] +[375.510506, "o", "i"] +[375.533335, "o", "o"] +[375.701534, "o", "n"] +[375.789484, "o", " "] +[375.965816, "o", "h"] +[375.97305, "o", "a"] +[376.16194, "o", "n"] +[376.453783, "o", "d"] +[376.589277, "o", "l"] +[376.701185, "o", "e"] +[376.765614, "o", "r"] +[376.917157, "o", " "] +[378.366234, "o", "\r\n"] +[378.366312, "o", "(gdb) "] +[378.696508, "o", "#"] +[378.819592, "o", " "] +[378.981518, "o", "i"] +[379.141542, "o", "s"] +[379.261122, "o", " "] +[379.406101, "o", "p"] +[379.469324, "o", "a"] +[379.557497, "o", "s"] +[379.725858, "o", "s"] +[379.816754, "o", "e"] +[379.925361, "o", "d"] +[380.021135, "o", " "] +[380.173176, "o", "t"] +[380.229575, "o", "o"] +[380.333106, "o", " "] +[380.485699, "o", "t"] +[380.605346, "o", "h"] +[380.734993, "o", "e"] +[380.836656, "o", " "] +[382.373895, "o", "g"] +[382.445188, "o", "e"] +[382.525518, "o", "n"] +[382.621654, "o", "e"] +[382.787196, "o", "r"] +[382.978124, "o", "i"] +[383.142264, "o", "c"] +[383.317634, "o", " "] +[383.819866, "o", "h"] +[383.878201, "o", "a"] +[383.99007, "o", "n"] +[384.089685, "o", "d"] +[384.197318, "o", "l"] +[384.277425, "o", "e"] +[384.461334, "o", "_"] +[384.661346, "o", "e"] +[384.79798, "o", "x"] +[385.006099, "o", "c"] +[385.090299, "o", "e"] +[385.170675, "o", "p"] +[385.310365, "o", "t"] +[385.389772, "o", "i"] +[385.421608, "o", "o"] +[385.581386, "o", "n"] +[386.20595, "o", "\r\n"] +[386.206176, "o", "(gdb) "] +[389.230197, "o", "#"] +[390.26918, "o", " "] +[391.325411, "o", "l"] +[391.42176, "o", "e"] +[391.581594, "o", "t"] +[391.774324, "o", "s"] +[391.886397, "o", " "] +[392.301564, "o", "s"] +[392.421411, "o", "e"] +[392.541767, "o", "t"] +[392.623009, "o", " "] +[392.706636, "o", "a"] +[392.840467, "o", " "] +[393.090357, "o", "b"] +[393.453217, "o", "r"] +[393.541455, "o", "e"] +[393.605239, "o", "a"] +[394.046616, "o", "k"] +[394.291075, "o", "p"] +[394.380916, "o", "o"] +[394.621516, "o", "n"] +[394.725214, "o", "t"] +[395.107038, "o", "\b\u001b[K"] +[395.413324, "o", "\b\u001b[K"] +[395.717268, "o", "i"] +[395.797318, "o", "n"] +[395.949174, "o", "t"] +[396.005249, "o", " "] +[396.160391, "o", "t"] +[396.222933, "o", "o"] +[396.301863, "o", " "] +[396.485547, "o", "h"] +[396.525186, "o", "a"] +[396.661547, "o", "n"] +[396.773364, "o", "d"] +[396.853597, "o", "l"] +[396.989123, "o", "e"] +[397.290232, "o", "_"] +[397.493521, "o", "e"] +[397.629719, "o", "x"] +[397.837301, "o", "c"] +[397.901283, "o", "e"] +[397.981223, "o", "p"] +[398.141235, "o", "t"] +[398.262293, "o", "o"] +[398.309802, "o", "p"] +[398.933271, "o", "\b\u001b[K"] +[399.005854, "o", "\b\u001b[K"] +[399.342381, "o", "i"] +[399.390296, "o", "o"] +[399.637201, "o", "n"] +[400.021147, "o", " "] +[400.134629, "o", "a"] +[400.253125, "o", "n"] +[400.333322, "o", "d"] +[400.437249, "o", " "] +[400.549161, "o", "s"] +[400.686474, "o", "e"] +[400.814467, "o", "e"] +[400.933556, "o", " "] +[402.002187, "o", "w"] +[402.205377, "o", "h"] +[402.229328, "o", "a"] +[402.397324, "o", "t"] +[402.4851, "o", " "] +[402.621307, "o", "w"] +[402.725189, "o", "e"] +[402.805449, "o", " "] +[402.949896, "o", "c"] +[403.035235, "o", "a"] +[403.253098, "o", "t"] +[403.437365, "o", "c"] +[403.509643, "o", "h"] +[403.973799, "o", "\r\n(gdb) "] +[404.677037, "o", "b"] +[404.765978, "o", "r"] +[404.837523, "o", "e"] +[404.89348, "o", "a"] +[404.965381, "o", "k"] +[405.069983, "o", " "] +[405.469329, "o", "h"] +[405.573246, "o", "a"] +[405.693381, "o", "n"] +[405.789649, "o", "d"] +[405.870641, "o", "l"] +[406.413246, "o", "\u0007e"] +[406.509294, "o", "e"] +[407.118199, "o", "\b\u001b[K"] +[407.3744, "o", "_"] +[407.550122, "o", "e"] +[407.725281, "o", "x"] +[407.938177, "o", "\u0007"] +[408.415128, "o", "c"] +[408.520749, "o", "\u0007eption"] +[408.893296, "o", "\r\n"] +[408.924833, "o", "Breakpoint 1 at \u001b[34m0xc15dea1f\u001b[m: file \u001b[32march/x86/entry/entry_32.S\u001b[m, line 1154.\r\n(gdb) "] +[416.67716, "o", "c"] +[417.078529, "o", "\r\nContinuing.\r\n"] +[417.083919, "o", "\r\n"] +[417.084103, "o", "Breakpoint 1, \u001b[33mhandle_exception\u001b[m () at \u001b[32march/x86/entry/entry_32.S\u001b[m:1154\r\n"] +[417.084192, "o", "1154\t\tSAVE_ALL switch_stacks=1 skip_gs=1 unwind_espfix=1\r\n(gdb) "] +[429.509889, "o", "#"] +[429.68491, "o", " "] +[429.877508, "o", "i"] +[429.981148, "o", "f"] +[430.109307, "o", " "] +[430.221386, "o", "w"] +[430.309864, "o", "e"] +[430.590443, "o", " "] +[430.809819, "o", "l"] +[430.989816, "o", "o"] +[431.117, "o", "o"] +[431.189041, "o", "k"] +[431.277218, "o", " "] +[431.437363, "o", "a"] +[431.661354, "o", "t"] +[431.725226, "o", " "] +[431.862491, "o", "t"] +[431.979484, "o", "h"] +[432.045404, "o", "e"] +[432.134089, "o", " "] +[432.277058, "o", "s"] +[432.421149, "o", "t"] +[432.469089, "o", "a"] +[432.667918, "o", "c"] +[432.732989, "o", "k"] +[432.893494, "o", " "] +[433.333128, "o", "w"] +[433.412995, "o", "e"] +[433.500507, "o", " "] +[433.653242, "o", "s"] +[433.757095, "o", "h"] +[433.829678, "o", "o"] +[433.948148, "o", "u"] +[434.073821, "o", "l"] +[434.132601, "o", "d"] +[434.22928, "o", " "] +[434.34112, "o", "f"] +[434.397286, "o", "i"] +[434.492998, "o", "n"] +[434.566083, "o", "d"] +[434.677046, "o", " "] +[434.821839, "o", "t"] +[434.909696, "o", "h"] +[434.995523, "o", "e"] +[435.090817, "o", " "] +[437.581687, "o", "a"] +[437.781198, "o", "c"] +[438.013647, "o", "t"] +[438.077543, "o", "u"] +[438.206204, "o", "a"] +[438.294424, "o", "l"] +[438.416002, "o", " "] +[439.485411, "o", "e"] +[439.630758, "o", "x"] +[439.829008, "o", "c"] +[439.916901, "o", "e"] +[439.964921, "o", "p"] +[440.140749, "o", "t"] +[440.215078, "o", "i"] +[440.253158, "o", "o"] +[440.465997, "o", "n"] +[440.573062, "o", " "] +[441.141509, "o", "h"] +[441.173285, "o", "a"] +[441.333677, "o", "n"] +[441.405478, "o", "d"] +[441.53343, "o", "l"] +[441.605845, "o", "e"] +[441.67794, "o", "r"] +[444.222754, "o", "\r\n"] +[444.222812, "o", "(gdb) "] +[445.323324, "o", "p"] +[445.469488, "o", "r"] +[445.541673, "o", "i"] +[445.605066, "o", "n"] +[445.693078, "o", "t"] +[445.838045, "o", " "] +[449.204742, "o", "("] +[449.662622, "o", "v"] +[449.701787, "o", "o"] +[449.789464, "o", "i"] +[449.861186, "o", "d"] +[449.955706, "o", " "] +[450.269467, "o", "*"] +[450.429896, "o", ")"] +[452.877512, "o", "("] +[453.661196, "o", "\b\u001b[K"] +[454.00543, "o", "(*"] +[454.210726, "o", "("] +[454.700739, "o", "i"] +[455.037441, "o", "\b\u001b[K"] +[455.25302, "o", "u"] +[455.43756, "o", "i"] +[455.510815, "o", "n"] +[455.628939, "o", "t"] +[456.820983, "o", "3"] +[456.909062, "o", "2"] +[457.044863, "o", "_"] +[457.213162, "o", "t"] +[457.685637, "o", " "] +[458.29388, "o", "*"] +[458.455775, "o", ")"] +[459.281591, "o", "e"] +[459.837464, "o", "\b\u001b[K"] +[460.301045, "o", "$"] +[460.868861, "o", "e"] +[461.03713, "o", "s"] +[461.101321, "o", "p"] +[461.786512, "o", ")"] +[464.477246, "o", "\r\n"] +[472.463976, "o", "$2 = (void *) \u001b[34m0xc15d3840\u001b[m <\u001b[33msysvec_apic_timer_interrupt\u001b[m>\r\n(gdb) "] +[483.413856, "o", "#"] +[483.613547, "o", " "] +[484.941064, "o", "t"] +[485.036894, "o", "h"] +[485.101573, "o", "i"] +[485.277486, "o", "s"] +[485.365346, "o", " "] +[485.782102, "o", "l"] +[485.989418, "o", "o"] +[486.117103, "o", "o"] +[486.197077, "o", "k"] +[486.333443, "o", "s"] +[486.413088, "o", " "] +[486.581136, "o", "l"] +[486.733891, "o", "i"] +[486.912819, "o", "k"] +[487.002561, "o", "e"] +[487.095043, "o", " "] +[487.261133, "o", "t"] +[487.333274, "o", "h"] +[487.46896, "o", "e"] +[487.550243, "o", " "] +[495.172835, "o", "t"] +[495.373523, "o", "i"] +[495.437104, "o", "m"] +[495.540612, "o", "e"] +[495.604806, "o", "r"] +[495.74046, "o", " "] +[496.74974, "o", "i"] +[496.813502, "o", "n"] +[496.93292, "o", "t"] +[497.037013, "o", "e"] +[497.229265, "o", "r"] +[497.372827, "o", "r"] +[497.500693, "o", "u"] +[497.556563, "o", "p"] +[497.673852, "o", "t"] +[498.109885, "o", "\r\n"] +[498.110099, "o", "(gdb) "] +[522.221171, "o", "#"] +[522.444874, "o", " "] +[522.684901, "o", "n"] +[522.91456, "o", "e"] +[523.316756, "o", "\b\u001b[K"] +[523.428746, "o", "\b\u001b[K"] +[523.500892, "o", "t"] +[523.644472, "o", "h"] +[523.757109, "o", "e"] +[523.955806, "o", " "] +[524.099169, "o", "n"] +[524.155454, "o", "e"] +[524.22103, "o", "x"] +[524.508979, "o", "t"] +[524.604798, "o", " "] +[525.057219, "o", "i"] +[525.162621, "o", "t"] +[525.389208, "o", "e"] +[525.469892, "o", "m"] +[525.55658, "o", " "] +[525.668816, "o", "o"] +[525.989001, "o", "n"] +[526.192028, "o", " "] +[526.28711, "o", "t"] +[526.422465, "o", "h"] +[526.493339, "o", "e"] +[526.589426, "o", " "] +[526.717587, "o", "s"] +[527.157628, "o", "t"] +[527.290422, "o", "a"] +[527.525162, "o", "c"] +[527.597243, "o", "k"] +[527.724964, "o", " "] +[527.965348, "o", "s"] +[528.029266, "o", "h"] +[528.101143, "o", "o"] +[528.21352, "o", "u"] +[528.373131, "o", "l"] +[528.509865, "o", "d"] +[528.53395, "o", " "] +[528.733472, "o", "b"] +[528.821848, "o", "e"] +[528.876906, "o", " "] +[529.052799, "o", "0"] +[529.726391, "o", "\r\n"] +[529.726579, "o", "(gdb) "] +[531.685101, "o", "# the next item on the stack should be 0"] +[532.28286, "o", "\r\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[3Pis looks like the timer interrupt"] +[532.958122, "o", "\r\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[4Pprint (void *)(*(uint32_t *)$esp)"] +[533.365021, "o", "\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b*(uint32_t *)$esp))"] +[533.700966, "o", "\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b(*(uint32_t *)$esp)"] +[534.021436, "o", "\b"] +[534.221855, "o", "\b"] +[534.373191, "o", "\b"] +[534.530437, "o", "\b"] +[534.692778, "o", "\b"] +[536.140976, "o", "($esp)\b\b\b\b\b"] +[536.886188, "o", "\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C"] +[537.029022, "o", "\b"] +[537.809435, "o", "+)\b"] +[537.989237, "o", "4)\b"] +[538.365079, "o", "\u001b[C)\b"] +[539.157186, "o", "\r\n"] +[539.157943, "o", "$3 = (void *) \u001b[34m0x0\u001b[m\r\n(gdb) "] +[541.245538, "o", "#"] +[541.460923, "o", " "] +[541.588649, "o", "t"] +[541.708909, "o", "h"] +[541.828839, "o", "e"] +[541.909914, "o", " "] +[542.068789, "o", "n"] +[542.109349, "o", "e"] +[542.166431, "o", "x"] +[542.469912, "o", "t"] +[542.558985, "o", " "] +[542.725111, "o", "s"] +[542.813268, "o", "h"] +[542.956594, "o", "o"] +[543.036718, "o", "u"] +[543.185591, "o", "l"] +[543.269565, "o", "d"] +[543.340548, "o", " "] +[543.494185, "o", "b"] +[543.564801, "o", "e"] +[543.637447, "o", " "] +[544.053085, "o", "t"] +[544.124773, "o", "h"] +[544.250569, "o", "e"] +[544.290687, "o", " "] +[544.404605, "o", "o"] +[544.565768, "o", "l"] +[544.65011, "o", "d"] +[544.764418, "o", " "] +[546.425323, "o", "E"] +[547.028847, "o", "I"] +[547.16456, "o", "P"] +[548.300627, "o", "\r\n"] +[548.300792, "o", "(gdb) "] +[548.989397, "o", "# the next should be the old EIP"] +[549.173162, "o", "\r\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[Cprint (void *)(*(uint32_t *)($esp+4))"] +[549.899257, "o", "\b"] +[550.129437, "o", "\b"] +[550.282812, "o", "\b"] +[552.748678, "o", "\u001b[1P))\b\b"] +[553.452994, "o", "8))\b\b"] +[554.357074, "o", "\r\n"] +[554.357422, "o", "$4 = (void *) \u001b[34m0xc15dcb62\u001b[m <\u001b[33mdefault_idle\u001b[m+18>\r\n(gdb) "] +[555.42081, "o", "#"] +[555.706632, "o", " "] +[555.853611, "o", "t"] +[555.981142, "o", "h"] +[556.060689, "o", "i"] +[556.180769, "o", "s"] +[556.276646, "o", " "] +[556.452498, "o", "l"] +[556.645524, "o", "o"] +[556.777618, "o", "o"] +[556.860672, "o", "k"] +[557.01089, "o", "s"] +[557.060819, "o", " "] +[557.204509, "o", "l"] +[557.364288, "o", "i"] +[557.524835, "o", "k"] +[557.597177, "o", "e"] +[557.71496, "o", " "] +[557.821078, "o", "a"] +[557.916565, "o", " "] +[558.074038, "o", "k"] +[558.189048, "o", "e"] +[558.261556, "o", "r"] +[558.308555, "o", "n"] +[558.404654, "o", "e"] +[558.500443, "o", "l"] +[558.596666, "o", " "] +[559.580743, "o", "f"] +[559.70893, "o", "u"] +[559.788765, "o", "n"] +[559.885141, "o", "c"] +[560.121613, "o", "t"] +[560.162133, "o", "i"] +[560.209714, "o", "o"] +[560.613052, "o", "n"] +[561.118427, "o", "\r\n(gdb) "] +[561.62926, "o", "#"] +[561.836781, "o", " "] +[561.988412, "o", "t"] +[562.10054, "o", "h"] +[562.164515, "o", "i"] +[562.261046, "o", "s"] +[562.3572, "o", " "] +[562.724997, "o", "m"] +[562.805154, "o", "e"] +[562.892772, "o", "a"] +[563.028918, "o", "n"] +[563.15677, "o", "s"] +[563.493188, "o", " "] +[563.741062, "o", "t"] +[563.886535, "o", "h"] +[563.957787, "o", "a"] +[564.029028, "o", "t"] +[564.180631, "o", " "] +[564.673213, "o", "w"] +[564.781861, "o", "e"] +[564.951458, "o", " "] +[567.116541, "o", "h"] +[567.164956, "o", "a"] +[567.385305, "o", "d"] +[567.533309, "o", " "] +[570.307091, "o", "a"] +[570.973109, "o", "n"] +[571.052577, "o", " "] +[571.196558, "o", "i"] +[571.276673, "o", "n"] +[571.356641, "o", "t"] +[571.437049, "o", "e"] +[571.573147, "o", "r"] +[571.733301, "o", "r"] +[571.820704, "o", "u"] +[571.886457, "o", "p"] +[571.980515, "o", "t"] +[572.061059, "o", " "] +[572.212575, "o", "w"] +[572.26865, "o", "i"] +[572.44514, "o", "t"] +[572.50106, "o", "h"] +[572.649694, "o", " "] +[572.810376, "o", "a"] +[573.596771, "o", "\b\u001b[K"] +[573.719587, "o", "\b\u001b[K"] +[573.948986, "o", "o"] +[574.060044, "o", "u"] +[574.200126, "o", "t"] +[574.245857, "o", " "] +[574.380757, "o", "a"] +[574.452561, "o", " "] +[574.9488, "o", "p"] +[575.129864, "o", "r"] +[575.246245, "o", "i"] +[575.436917, "o", "v"] +[575.500921, "o", "i"] +[575.708822, "o", "l"] +[575.788518, "o", "e"] +[576.069274, "o", "d"] +[576.335142, "o", "g"] +[576.405962, "o", "e"] +[577.434405, "o", " "] +[577.67696, "o", "t"] +[577.88499, "o", "r"] +[577.980284, "o", "a"] +[578.876849, "o", "s"] +[578.980703, "o", "i"] +[579.149228, "o", "t"] +[579.221513, "o", "i"] +[579.26884, "o", "o"] +[579.41719, "o", "n"] +[579.677291, "o", "\r\n(gdb) "] +[589.020658, "o", "#"] +[589.341219, "o", " "] +[589.660946, "o", "s"] +[589.676545, "o", "o"] +[589.89489, "o", " "] +[591.052951, "o", "t"] +[591.257618, "o", "h"] +[591.35662, "o", "e"] +[592.084943, "o", " "] +[592.410388, "o", "n"] +[592.465233, "o", "e"] +[592.604429, "o", "x"] +[592.900822, "o", "t"] +[593.012732, "o", " "] +[595.93505, "o", "v"] +[596.021101, "o", "a"] +[596.124525, "o", "l"] +[596.372663, "o", "u"] +[596.516986, "o", "e"] +[596.533116, "o", " "] +[596.716689, "o", "o"] +[596.916453, "o", "n"] +[597.012654, "o", " "] +[597.149, "o", "s"] +[597.33301, "o", "t"] +[597.396858, "o", "a"] +[597.605076, "o", "c"] +[597.716706, "o", "k"] +[597.781734, "o", " "] +[597.970938, "o", "s"] +[598.061845, "o", "h"] +[598.148867, "o", "o"] +[598.266043, "o", "u"] +[598.412427, "o", "l"] +[598.492956, "o", "d"] +[598.58072, "o", " "] +[598.748929, "o", "b"] +[598.828818, "o", "e"] +[598.986377, "o", " "] +[600.612464, "o", "t"] +[600.701426, "o", "h"] +[600.828697, "o", "e"] +[600.908559, "o", " "] +[604.24482, "o", "o"] +[604.501075, "o", "l"] +[604.637138, "o", "d"] +[604.810185, "o", " "] +[605.22084, "o", "C"] +[605.340414, "o", "S"] +[609.981442, "o", " "] +[610.100291, "o", "a"] +[610.245237, "o", "n"] +[610.285745, "o", "d"] +[610.436727, "o", " "] +[610.55753, "o", "t"] +[610.668519, "o", "h"] +[610.75681, "o", "e"] +[610.860497, "o", " "] +[612.298872, "o", "o"] +[612.500901, "o", "l"] +[612.54844, "o", "d"] +[612.645586, "o", " "] +[612.86999, "o", "E"] +[613.068337, "o", "F"] +[613.14841, "o", "L"] +[613.228911, "o", "A"] +[613.389277, "o", "G"] +[613.509013, "o", "S"] +[614.292855, "o", "\r\n"] +[614.29299, "o", "(gdb) "] +[615.946645, "o", "# so the next value on stack should be the old CS and the old EFLAGS"] +[616.276692, "o", "\r\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[Cthis means that we had an interrupt without a priviledge trasition"] +[616.484792, "o", "\r\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[33Plooks like a kernel function"] +[617.009785, "o", "\r\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[Cprint (void *)(*(uint32_t *)($esp+8))"] +[617.540423, "o", "\b"] +[617.69268, "o", "\b"] +[617.836936, "o", "\b"] +[618.204688, "o", "\u001b[1P))\b\b"] +[620.260979, "o", "1))\b\b"] +[620.381284, "o", "2))\b\b"] +[620.932544, "o", "\r\n"] +[620.933245, "o", "$5 = (void *) \u001b[34m0x60\u001b[m\r\n(gdb) "] +[632.741989, "o", "p"] +[632.876823, "o", "r"] +[633.012803, "o", "i"] +[633.08155, "o", "n"] +[633.188878, "o", "t"] +[633.757256, "o", " "] +[634.436865, "o", "/"] +[634.937002, "o", "x"] +[635.085661, "o", " "] +[635.388506, "o", "$"] +[636.389874, "o", "c"] +[636.468457, "o", "s"] +[636.764836, "o", "\r\n"] +[636.765008, "o", "$6 = 0x60\r\n(gdb) "] +[641.676586, "o", "#"] +[641.884708, "o", " "] +[641.996872, "o", "a"] +[642.091842, "o", "s"] +[642.196689, "o", " "] +[642.364633, "o", "e"] +[642.484294, "o", "x"] +[642.802488, "o", "p"] +[643.045976, "o", "e"] +[643.164662, "o", "c"] +[643.444844, "o", "t"] +[643.508529, "o", "e"] +[643.708582, "o", "d"] +[643.884817, "o", " "] +[651.548692, "o", "t"] +[653.215508, "o", "h"] +[653.281727, "o", "e"] +[653.413871, "o", " "] +[653.644609, "o", "s"] +[653.772321, "o", "a"] +[653.908641, "o", "m"] +[654.164845, "o", "e"] +[654.828484, "o", " "] +[655.301976, "o", "C"] +[655.497877, "o", "S"] +[655.781698, "o", " "] +[656.109023, "o", "v"] +[656.172724, "o", "a"] +[656.308716, "o", "l"] +[656.540506, "o", "u"] +[656.655112, "o", "e"] +[662.412599, "o", " "] +[666.32495, "o", "\b\u001b[K"] +[682.380362, "o", ","] +[682.516746, "o", " "] +[682.812541, "o", "L"] +[683.084916, "o", "ui"] +[683.17327, "o", "n"] +[683.260224, "o", "u"] +[683.34001, "o", "x"] +[683.388145, "o", " "] +[683.805756, "o", "\b\u001b[K"] +[683.924955, "o", "\b\u001b[K"] +[684.052826, "o", "\b\u001b[K"] +[684.187913, "o", "\b\u001b[K"] +[684.316421, "o", "\b\u001b[K"] +[684.452334, "o", "\b\u001b[K"] +[684.588384, "o", "i"] +[684.78853, "o", "n"] +[684.851703, "o", "u"] +[684.951339, "o", "x"] +[685.045154, "o", " "] +[685.154943, "o", "u"] +[685.236579, "o", "s"] +[685.396194, "o", "e"] +[685.50896, "o", "s"] +[685.62021, "o", " "] +[685.748478, "o", "t"] +[685.887377, "o", "e"] +[686.001983, "o", " "] +[686.332372, "o", "\b\u001b[K"] +[686.461598, "o", "\b\u001b[K"] +[686.644619, "o", "h"] +[686.700338, "o", "e"] +[686.7799, "o", " "] +[686.908468, "o", "s"] +[686.996325, "o", "a"] +[687.068521, "o", "m"] +[687.229012, "o", "e"] +[687.306456, "o", " "] +[687.692433, "o", "c"] +[687.79635, "o", "o"] +[687.916226, "o", "d"] +[687.956423, "o", "e"] +[688.052213, "o", " "] +[688.155815, "o", "s"] +[688.297346, "o", "e"] +[688.411731, "o", "l"] +[688.508792, "o", "e"] +[688.589641, "o", "c"] +[688.80467, "o", "t"] +[688.884059, "o", "o"] +[688.972059, "o", "r"] +[689.837294, "o", "\r\n(gdb) "] +[690.13223, "o", "#"] +[690.276507, "o", " "] +[690.468264, "o", "f"] +[690.572263, "o", "o"] +[690.699264, "o", "r"] +[690.983585, "o", " "] +[691.284204, "o", "a"] +[691.372385, "o", "l"] +[691.524311, "o", "l"] +[691.572643, "o", " "] +[691.756121, "o", "t"] +[691.884368, "o", "h"] +[691.968393, "o", "e"] +[692.060258, "o", " "] +[692.396261, "o", "L"] +[692.644795, "o", "i"] +[692.772298, "o", "n"] +[692.846376, "o", "u"] +[692.950449, "o", "x"] +[693.074016, "o", " "] +[693.412476, "o", "k"] +[693.54072, "o", "e"] +[693.60488, "o", "r"] +[693.652141, "o", "n"] +[693.781041, "o", "e"] +[693.842522, "o", "l"] +[693.932546, "o", " "] +[694.093549, "o", "c"] +[694.188975, "o", "o"] +[694.325315, "o", "d"] +[694.372717, "o", "e"] +[699.204488, "o", "\r\n"] +[699.204575, "o", "(gdb) "] +[699.694945, "o", "#"] +[700.025409, "o", " "] +[701.082078, "o", "a"] +[701.219865, "o", "n"] +[701.300717, "o", "d"] +[701.428262, "o", " "] +[701.516018, "o", "a"] +[701.620252, "o", " "] +[701.762559, "o", "d"] +[701.884545, "o", "i"] +[701.964346, "o", "f"] +[702.396072, "o", "f"] +[702.644607, "o", "e"] +[702.772976, "o", "r"] +[702.857454, "o", "e"] +[702.979937, "o", "n"] +[703.105594, "o", "t"] +[703.205114, "o", " "] +[704.469353, "o", "s"] +[704.684404, "o", "e"] +[704.892085, "o", "l"] +[704.948089, "o", "e"] +[705.01199, "o", "c"] +[705.252192, "o", "t"] +[705.378083, "o", "o"] +[705.45294, "o", "r"] +[705.620379, "o", " "] +[705.788126, "o", "f"] +[705.996593, "o", "o"] +[706.156069, "o", "r"] +[706.324525, "o", " "] +[706.563158, "o", "u"] +[706.619009, "o", "s"] +[706.668009, "o", "e"] +[706.738935, "o", "r"] +[707.340183, "o", " "] +[707.444058, "o", "c"] +[707.564119, "o", "o"] +[707.684062, "o", "d"] +[707.740063, "o", "e"] +[709.035117, "o", " "] +[709.180598, "o", "a"] +[709.228019, "o", "s"] +[709.349715, "o", " "] +[709.444522, "o", "w"] +[709.564695, "o", "e"] +[709.588558, "o", "i"] +[709.803763, "o", "l"] +[710.268131, "o", "\b\u001b[K"] +[710.421074, "o", "\b\u001b[K"] +[710.581086, "o", "\b\u001b[K"] +[711.029269, "o", "e"] +[711.172267, "o", " "] +[711.356432, "o", "w"] +[711.452408, "o", "i"] +[711.604825, "o", "l"] +[711.760514, "o", "l"] +[711.809979, "o", " "] +[712.108216, "o", "s"] +[712.284343, "o", "e"] +[712.420395, "o", "e"] +[712.531941, "o", "\r\n"] +[712.531998, "o", "(gdb) "] +[717.364429, "o", "# and a different selector for user code as we will see"] +[717.579692, "o", "\r\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[24Pfor all the Linux kernel code"] +[718.022257, "o", "\r\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[Cas expected the same CS value, Linux uses the same code selector"] +[718.427912, "o", "\r\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[Cprint /x $cs\u001b[K"] +[718.843933, "o", "\b\b\b\b\b\b(void *)(*(uint32_t *)($esp+12))"] +[719.45271, "o", "\b"] +[719.612108, "o", "\b"] +[719.748109, "o", "\b"] +[720.321698, "o", "\u001b[1P))\b\b"] +[722.604741, "o", "6))\b\b"] +[723.284228, "o", "\r\n"] +[723.284909, "o", "$7 = (void *) \u001b[34m0x200246\u001b[m\r\n(gdb) "] +[732.228199, "o", "#"] +[732.483837, "o", " "] +[732.66115, "o", "t"] +[732.804463, "o", "h"] +[732.886263, "o", "e"] +[733.104678, "o", "s"] +[733.16471, "o", "e"] +[733.692292, "o", " "] +[733.789016, "o", "a"] +[733.937223, "o", "r"] +[734.012056, "o", "e"] +[734.069023, "o", " "] +[734.220446, "o", "t"] +[734.286456, "o", "h"] +[734.380547, "o", "e"] +[734.467934, "o", " "] +[734.596123, "o", "o"] +[734.981382, "o", "l"] +[735.133112, "o", "d"] +[735.268121, "o", " "] +[736.004384, "o", "E"] +[736.201581, "o", "F"] +[736.284997, "o", "L"] +[736.38164, "o", "A"] +[736.540379, "o", "G"] +[736.716525, "o", "S"] +[737.628709, "o", "\r\n"] +[737.628991, "o", "(gdb) "] +[756.459919, "o", "#"] +[756.713355, "o", " "] +[756.972562, "o", "l"] +[757.372058, "o", "e"] +[757.555961, "o", "t"] +[757.715745, "o", "s"] +[757.811866, "o", " "] +[757.900933, "o", "s"] +[758.100942, "o", "t"] +[758.252254, "o", "e"] +[758.836667, "o", "p"] +[759.136686, "o", " "] +[759.34112, "o", "t"] +[759.619803, "o", "h"] +[759.723904, "o", "o"] +[759.740077, "o", "r"] +[759.892415, "o", "u"] +[759.988008, "o", "g"] +[760.092285, "o", "h"] +[760.209389, "o", "t"] +[760.52954, "o", "\b\u001b[K"] +[760.659839, "o", "\b\u001b[K"] +[760.796107, "o", "\b\u001b[K"] +[760.924285, "o", "\b\u001b[K"] +[761.060871, "o", "\b\u001b[K"] +[761.49694, "o", "\b\u001b[K"] +[761.677155, "o", "r"] +[761.77997, "o", "o"] +[761.940033, "o", "u"] +[762.444551, "o", "g"] +[762.500741, "o", "h"] +[762.62848, "o", " "] +[762.752533, "o", "t"] +[762.795851, "o", "h"] +[762.901139, "o", "e"] +[762.972318, "o", " "] +[763.076113, "o", "e"] +[763.163768, "o", "x"] +[763.891933, "o", "c"] +[763.964072, "o", "e"] +[764.075829, "o", "p"] +[764.220556, "o", "t"] +[764.299718, "o", "i"] +[764.339842, "o", "o"] +[764.500348, "o", "n"] +[764.580435, "o", " "] +[764.749173, "o", "h"] +[764.805422, "o", "a"] +[764.940176, "o", "n"] +[765.00449, "o", "d"] +[765.172277, "o", "l"] +[765.259869, "o", "e"] +[765.36498, "o", "r"] +[767.092697, "o", "\r\n(gdb) "] +[769.452692, "o", "n"] +[769.492828, "o", "e"] +[769.60402, "o", "x"] +[769.852194, "o", "t"] +[770.75791, "o", "\r\n"] +[770.817206, "o", "\u001b[33mhandle_exception\u001b[m () at \u001b[32march/x86/entry/entry_32.S\u001b[m:1155\r\n1155\t\tENCODE_FRAME_POINTER\r\n(gdb) "] +[771.979803, "o", "\r\n"] +[771.984483, "o", "1158\t\tGS_TO_REG %ecx\r\n(gdb) "] +[772.901642, "o", "\r\n"] +[772.904154, "o", "1159\t\tmovl\tPT_GS(%esp), %edi\t\t# get the function address\r\n(gdb) "] +[774.044833, "o", "\r\n"] +[774.047555, "o", "1160\t\tREG_TO_PTGS %ecx\r\n(gdb) "] +[775.068367, "o", "\r\n"] +[775.071481, "o", "1161\t\tSET_KERNEL_GS %ecx\r\n(gdb) "] +[776.020547, "o", "\r\n"] +[776.024942, "o", "1164\t\tmovl\tPT_ORIG_EAX(%esp), %edx\t\t# get the error code\r\n(gdb) "] +[777.372032, "o", "\r\n"] +[777.374254, "o", "1165\t\tmovl\t$-1, PT_ORIG_EAX(%esp)\t\t# no syscall to restart\r\n(gdb) "] +[778.932072, "o", "\r\n"] +[778.935109, "o", "1167\t\tmovl\t%esp, %eax\t\t\t# pt_regs pointer\r\n(gdb) "] +[780.084544, "o", "\r\n"] +[780.087079, "o", "1168\t\tCALL_NOSPEC edi\r\n"] +[780.087242, "o", "(gdb) "] +[789.068946, "o", "t"] +[789.163754, "o", "h"] +[789.227926, "o", "i"] +[789.364066, "o", "s"] +[789.475736, "o", " "] +[789.77071, "o", "s"] +[789.845352, "o", "h"] +[789.900666, "o", "o"] +[790.01865, "o", "u"] +[790.164322, "o", "l"] +[790.212024, "o", "d"] +[790.379831, "o", " "] +[790.843865, "o", "c"] +[790.948496, "o", "a"] +[790.98792, "o", "l"] +[791.099624, "o", "l"] +[791.179854, "o", " "] +[791.884188, "o", "\r\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C"] +[792.124312, "o", "\u001b[1@#"] +[792.300147, "o", "\u001b[1@ "] +[792.523828, "o", "\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C"] +[793.004183, "o", "t"] +[793.108772, "o", "h"] +[793.244482, "o", "e"] +[793.283931, "o", " "] +[794.948292, "o", "a"] +[795.052886, "o", "c"] +[795.267741, "o", "t"] +[795.332027, "o", "u"] +[795.443685, "o", "a"] +[795.53154, "o", "l"] +[795.643762, "o", " "] +[795.755946, "o", "e"] +[795.875998, "o", "x"] +[796.076283, "o", "c"] +[796.163912, "o", "e"] +[796.291666, "o", "p"] +[796.507701, "o", "t"] +[796.564221, "o", "i"] +[796.619819, "o", "o"] +[796.787619, "o", "n"] +[796.876639, "o", " "] +[797.227727, "o", "h"] +[797.347798, "o", "a"] +[797.453011, "o", "n"] +[797.531866, "o", "d"] +[797.643736, "o", "l"] +[797.731685, "o", "e"] +[797.811748, "o", "r"] +[798.716546, "o", "\r\n(gdb) "] +[799.81999, "o", "p"] +[799.969817, "o", "r"] +[800.040246, "o", "i"] +[800.120984, "o", "n"] +[800.180163, "o", "t"] +[800.347919, "o", " "] +[800.969409, "o", "/"] +[801.120768, "o", "x"] +[801.221364, "o", " "] +[801.500176, "o", "$"] +[802.020108, "o", "e"] +[802.260097, "o", "d"] +[802.451977, "o", "i"] +[803.732078, "o", "\r\n"] +[803.732203, "o", "$8 = 0xc15d3840\r\n(gdb) "] +[806.04535, "o", "print /x $edi"] +[806.435671, "o", "\b"] +[806.952968, "o", "\b"] +[806.979538, "o", "\b"] +[807.01104, "o", "\b"] +[807.042755, "o", "\b"] +[807.604211, "o", "\b\u001b[1P $edi\b\b\b\b\b"] +[807.724321, "o", "\b\u001b[1P $edi\b\b\b\b\b"] +[808.12947, "o", "( $edi\b\b\b\b\b"] +[808.30103, "o", "v $edi\b\b\b\b\b"] +[808.400349, "o", "o $edi\b\b\b\b\b"] +[808.459861, "o", "i $edi\b\b\b\b\b"] +[808.54765, "o", "d $edi\b\b\b\b\b"] +[808.691701, "o", "\u001b[C $edi\b\b\b\b\b"] +[808.964799, "o", "* $edi\b\b\b\b\b"] +[809.100734, "o", ") $edi\b\b\b\b\b"] +[809.780431, "o", "\u001b[1P$edi\b\b\b\b"] +[810.188463, "o", "\r\n"] +[810.188589, "o", "$9 = (void *) \u001b[34m0xc15d3840\u001b[m <\u001b[33msysvec_apic_timer_interrupt\u001b[m>\r\n(gdb) "] +[820.684388, "o", "s"] +[821.316168, "o", "t"] +[821.396037, "o", "e"] +[821.563919, "o", "p"] +[821.819873, "o", "i"] +[823.507985, "o", "\r\n"] +[823.511475, "o", "\u001b[34m0xc15e0b05\u001b[m in \u001b[33m__x86_retpoline_edi\u001b[m ()\u001b[m\r\n \u001b[m at \u001b[32m./arch/x86/include/asm/GEN-for-each-reg.h\u001b[m:23\r\n23\tGEN(edi)\r\n(gdb) "] +[827.219827, "o", "b"] +[827.348216, "o", "t"] +[828.148173, "o", "\r\n"] +[828.14836, "o", "#0 \u001b[34m0xc15e0b05\u001b[m in \u001b[33m__x86_retpoline_edi\u001b[m ()\u001b[m\r\n \u001b[m at \u001b[32m./arch/x86/include/asm/GEN-for-each-reg.h\u001b[m:23\r\n#1 \u001b[34m0xc15deb5f\u001b[m in \u001b[33mhandle_exception\u001b[m () at \u001b[32march/x86/entry/entry_32.S\u001b[m:1168\r\n"] +[828.149581, "o", "#2 \u001b[34m0x00000000\u001b[m in \u001b[33m??\u001b[m ()\r\n"] +[828.150395, "o", "(gdb) "] +[829.675679, "o", "s"] +[829.880802, "o", "t"] +[829.945607, "o", "e"] +[829.998494, "o", "p"] +[830.140786, "o", "i"] +[830.467985, "o", "\r\n"] +[830.470567, "o", "\u001b[34m0xc15e0b11\u001b[m in \u001b[33m__x86_retpoline_edi\u001b[m ()\u001b[m\r\n \u001b[m at \u001b[32m./arch/x86/include/asm/GEN-for-each-reg.h\u001b[m:23\r\n23\tGEN(edi)\r\n"] +[830.470677, "o", "(gdb) "] +[831.507846, "o", "\r\n"] +[831.510731, "o", "\u001b[34m0xc15e0b14\u001b[m\t23\tGEN(edi)\r\n(gdb) "] +[833.044138, "o", "\r\n"] +[833.047651, "o", "\u001b[33msysvec_apic_timer_interrupt\u001b[m (\u001b[36mregs\u001b[m=0xc17cfebc)\u001b[m\r\n \u001b[m at \u001b[32march/x86/kernel/apic/apic.c\u001b[m:1091\r\n"] +[833.047776, "o", "1091\tDEFINE_IDTENTRY_SYSVEC(sysvec_apic_timer_interrupt)\r\n(gdb) "] +[837.992379, "o", "b"] +[838.082464, "o", "t"] +[838.468115, "o", "\r\n"] +[838.468954, "o", "#0 \u001b[33msysvec_apic_timer_interrupt\u001b[m (\u001b[36mregs\u001b[m=0xc17cfebc)\u001b[m\r\n \u001b[m at \u001b[32march/x86/kernel/apic/apic.c\u001b[m:1091\r\n#1 \u001b[34m0xc15deb5f\u001b[m in \u001b[33mhandle_exception\u001b[m () at \u001b[32march/x86/entry/entry_32.S\u001b[m:1168\r\n"] +[838.470231, "o", "#2 \u001b[34m0x00000000\u001b[m in \u001b[33m??\u001b[m ()\r\n"] +[838.47094, "o", "(gdb) "] +[840.947473, "o", "#"] +[841.144354, "o", " "] +[841.360793, "o", "o"] +[841.42971, "o", "k"] +[842.204191, "o", ","] +[842.341979, "o", " "] +[842.444332, "o", "w"] +[842.539968, "o", "e"] +[842.677559, "o", " "] +[842.867869, "o", "r"] +[842.932437, "o", "e"] +[843.051893, "o", "a"] +[843.267887, "o", "c"] +[843.717425, "o", "h"] +[844.315499, "o", " "] +[844.976743, "o", "\b\u001b[K"] +[845.116092, "o", "e"] +[845.283572, "o", "d"] +[845.419727, "o", " "] +[845.579645, "o", "t"] +[845.69171, "o", "h"] +[845.784239, "o", "e"] +[845.884895, "o", " "] +[846.475528, "o", "e"] +[846.992223, "o", "x"] +[847.779862, "o", "c"] +[847.899595, "o", "e"] +[848.012177, "o", "p"] +[848.237069, "o", "t"] +[848.299719, "o", "i"] +[848.355554, "o", "o"] +[848.548387, "o", "n"] +[848.668273, "o", " "] +[848.883368, "o", "h"] +[848.924119, "o", "a"] +[849.102725, "o", "n"] +[849.195892, "o", "d"] +[849.320334, "o", "l"] +[849.403392, "o", "e"] +[849.469381, "o", "r"] +[849.555461, "o", " "] +[849.683646, "o", "f"] +[849.764172, "o", "u"] +[849.82762, "o", "n"] +[849.940511, "o", "c"] +[850.140061, "o", "t"] +[850.188329, "o", "i"] +[850.236049, "o", "o"] +[850.403561, "o", "n"] +[851.244482, "o", "\r\n(gdb) "] +[851.940142, "o", "#"] +[852.116219, "o", " "] +[852.362087, "o", "l"] +[852.425526, "o", "e"] +[852.572158, "o", "t"] +[852.899784, "o", "s"] +[853.075543, "o", " "] +[853.252474, "o", "s"] +[853.360745, "o", "k"] +[853.555665, "o", "i"] +[853.756754, "o", "p"] +[853.893804, "o", " "] +[854.067629, "o", "w"] +[854.179574, "o", "o"] +[854.479805, "o", "\b\u001b[K"] +[854.580246, "o", "\b\u001b[K"] +[854.691967, "o", "t"] +[854.756166, "o", "o"] +[854.866408, "o", " "] +[854.916837, "o", "t"] +[855.051746, "o", "h"] +[855.13964, "o", "e"] +[855.203583, "o", " "] +[855.484009, "o", "e"] +[855.593676, "o", "n"] +[855.675946, "o", "d"] +[856.363844, "o", "\r\n"] +[856.363913, "o", "(gdb) "] +[857.411824, "o", "f"] +[857.459708, "o", "i"] +[857.596332, "o", "n"] +[857.644121, "o", "i"] +[857.716486, "o", "s"] +[857.825967, "o", "h"] +[859.756006, "o", "\r\n"] +[859.756719, "o", "Run till exit from #0 \u001b[33msysvec_apic_timer_interrupt\u001b[m (\u001b[36mregs\u001b[m=0xc17cfebc)\u001b[m\r\n \u001b[m at \u001b[32march/x86/kernel/apic/apic.c\u001b[m:1091\r\n"] +[859.763094, "o", "\u001b[33mhandle_exception\u001b[m () at \u001b[32march/x86/entry/entry_32.S\u001b[m:1179\r\n1179\t\tmovl\tPT_CS(%esp), %eax\r\n"] +[859.763267, "o", "(gdb) "] +[862.512345, "o", "n"] +[864.404212, "o", "\r\n"] +[864.406527, "o", "1180\t\tandl\t$SEGMENT_RPL_MASK, %eax\r\n"] +[864.406578, "o", "(gdb) "] +[865.654797, "o", "n"] +[866.267877, "o", "\r\n"] +[866.27079, "o", "1182\t\tcmpl\t$USER_RPL, %eax\t\t\t# returning to v8086 or userspace ?\r\n(gdb) "] +[867.360688, "o", "n"] +[867.708304, "o", "\r\n"] +[867.710843, "o", "1183\t\tjnb\tret_to_user\r\n"] +[867.710968, "o", "(gdb) "] +[868.899288, "o", "n"] +[870.284212, "o", "\r\n"] +[870.287095, "o", "1185\t\tPARANOID_EXIT_TO_KERNEL_MODE\r\n"] +[870.287206, "o", "(gdb) "] +[871.396245, "o", "n"] +[872.356138, "o", "\r\n"] +[872.360073, "o", "1187\t\tRESTORE_REGS 4\r\n(gdb) "] +[873.580284, "o", "n"] +[874.020225, "o", "\r\n"] +[874.06255, "o", "\u001b[33mhandle_exception\u001b[m () at \u001b[32march/x86/entry/entry_32.S\u001b[m:1188\r\n1188\t\tjmp\t.Lirq_return\r\n(gdb) "] +[880.227976, "o", "s"] +[880.563745, "o", "t"] +[880.663906, "o", "e"] +[880.772059, "o", "p"] +[881.016269, "o", "i"] +[885.107968, "o", "\r\n"] +[885.1111, "o", "\u001b[33mentry_INT80_32\u001b[m () at \u001b[32march/x86/entry/entry_32.S\u001b[m:1080\r\n1080\t\tINTERRUPT_RETURN\r\n(gdb) "] +[886.896524, "o", "d"] +[887.002145, "o", "i"] +[887.17178, "o", "s"] +[887.267258, "o", "a"] +[887.427617, "o", "s"] +[888.193298, "o", "semble "] +[888.985353, "o", "$"] +[889.2295, "o", "p"] +[889.6279, "o", "c"] +[889.755674, "o", ","] +[890.130259, "o", "+"] +[890.523791, "o", "4"] +[892.275975, "o", "\r\n"] +[892.276128, "o", "Dump of assembler code from 0xc15dea1e to 0xc15dea22:\r\n"] +[892.276217, "o", "=> \u001b[34m0xc15dea1e\u001b[m <\u001b[33mentry_INT80_32\u001b[m+426>:\tiret \r\n \u001b[34m0xc15dea1f\u001b[m <\u001b[33mhandle_exception\u001b[m+0>:\tcld \r\n"] +[892.276254, "o", " \u001b[34m0xc15dea20\u001b[m <\u001b[33mhandle_exception\u001b[m+1>:\tpush %fs\r\nEnd of assembler dump.\r\n"] +[892.276365, "o", "(gdb) "] +[900.885286, "o", "#"] +[901.051387, "o", " "] +[903.108694, "o", "w"] +[903.195526, "o", "e"] +[903.331464, "o", " "] +[903.507641, "o", "r"] +[903.579802, "o", "e"] +[903.651849, "o", "a"] +[903.827191, "o", "c"] +[903.915925, "o", "h"] +[904.018162, "o", "e"] +[904.084005, "o", "d"] +[904.193456, "o", " "] +[904.340005, "o", "t"] +[904.419249, "o", "h"] +[904.50751, "o", "e"] +[904.619508, "o", " "] +[905.050041, "o", "e"] +[905.142961, "o", "n"] +[905.242133, "o", "d"] +[905.356107, "o", " "] +[906.418504, "o", "o"] +[906.483992, "o", "f"] +[906.571581, "o", " "] +[906.731762, "o", "t"] +[906.788765, "o", "h"] +[906.915517, "o", "e"] +[907.019539, "o", " "] +[907.226477, "o", "e"] +[907.376179, "o", "x"] +[907.627505, "o", "c"] +[908.25994, "o", "e"] +[908.371671, "o", "p"] +[908.580147, "o", "t"] +[908.635847, "o", "i"] +[908.675142, "o", "o"] +[908.868113, "o", "n"] +[909.147401, "o", " "] +[910.283243, "o", "h"] +[910.34774, "o", "a"] +[910.539401, "o", "n"] +[910.94662, "o", "d"] +[911.099697, "o", "l"] +[912.379334, "o", "i"] +[912.463978, "o", "n"] +[912.578486, "o", "g"] +[915.172176, "o", "\r\n"] +[915.17238, "o", "(gdb) "] +[915.627306, "o", "#"] +[915.802899, "o", " "] +[916.01211, "o", "i"] +[916.132261, "o", "r"] +[916.480252, "o", "e"] +[916.740283, "o", "t"] +[916.875718, "o", " "] +[917.027976, "o", "t"] +[917.707571, "o", "\b\u001b[K"] +[917.850998, "o", "w"] +[917.987234, "o", "i"] +[918.164289, "o", "l"] +[918.345294, "o", "l"] +[918.428587, "o", " "] +[921.142498, "o", "p"] +[921.315361, "o", "o"] +[921.379736, "o", "p"] +[922.235945, "o", " "] +[922.523391, "o", "t"] +[922.65927, "o", "h"] +[922.77948, "o", "e"] +[922.931826, "o", " "] +[923.619616, "o", "o"] +[923.81968, "o", "l"] +[924.348218, "o", "d"] +[924.483915, "o", " "] +[924.803599, "o", "E"] +[925.091333, "o", "I"] +[925.204834, "o", "P"] +[925.859791, "o", ","] +[926.323847, "o", " "] +[926.628744, "o", "O"] +[926.836056, "o", "L"] +[926.972515, "o", "D"] +[927.555719, "o", " "] +[927.860637, "o", "C"] +[927.956656, "o", "S"] +[928.131232, "o", " "] +[928.283599, "o", "a"] +[928.379165, "o", "n"] +[928.499395, "o", "d"] +[928.572276, "o", " "] +[930.399792, "o", "o"] +[930.579592, "o", "l"] +[930.747191, "o", "d "] +[936.451423, "o", "E"] +[936.651497, "o", "F"] +[936.724116, "o", "L"] +[936.803501, "o", "A"] +[936.944129, "o", "G"] +[937.017988, "o", "S"] +[940.419551, "o", "\r\n(gdb) "] +[940.703548, "o", "#"] +[940.867379, "o", " "] +[941.131995, "o", "a"] +[941.236387, "o", "n"] +[941.315354, "o", "d"] +[941.428119, "o", " "] +[941.545304, "o", "r"] +[941.595714, "o", "e"] +[941.780241, "o", "s"] +[941.875308, "o", "u"] +[942.324162, "o", "m"] +[942.479946, "o", "e"] +[942.595786, "o", " "] +[942.724262, "o", "t"] +[942.828484, "o", "h"] +[942.914989, "o", "e"] +[943.019342, "o", " "] +[943.291437, "o", "p"] +[943.427129, "o", "r"] +[943.492285, "o", "e"] +[943.680718, "o", "v"] +[943.762965, "o", "i"] +[943.827585, "o", "o"] +[944.003395, "o", "u"] +[944.075376, "o", "s"] +[944.179349, "o", " "] +[944.620027, "o", "e"] +[944.800545, "o", "x"] +[944.880871, "o", "e"] +[945.076374, "o", "c"] +[945.355484, "o", "t"] +[945.451345, "o", "i"] +[945.499272, "o", "o"] +[945.699588, "o", "n"] +[945.868687, "o", " "] +[946.332003, "o", "f"] +[946.451626, "o", "l"] +[946.715244, "o", "o"] +[949.939268, "o", "w"] +[952.179651, "o", "\r\n(gdb) "] +[953.872763, "o", "s"] +[954.104233, "o", "t"] +[954.220454, "o", "e"] +[954.523396, "o", "p"] +[954.699106, "o", "i"] +[957.155393, "o", "\r\n"] +[957.157788, "o", "\u001b[34m0xc15dcb62\u001b[m in \u001b[33mdefault_idle\u001b[m () at \u001b[32m./arch/x86/include/asm/irqflags.h\u001b[m:60\r\n60\t\tasm volatile(\"sti; hlt\": : :\"memory\");\r\n"] +[957.157838, "o", "(gdb) "] +[959.275429, "o", "#"] +[959.427369, "o", " "] +[959.547282, "o", "a"] +[959.660394, "o", "n"] +[959.739359, "o", "d"] +[959.843798, "o", " "] +[959.9766, "o", "w"] +[960.047887, "o", "e"] +[960.172093, "o", " "] +[960.315666, "o", "a"] +[960.419302, "o", "r"] +[960.491251, "o", "e"] +[960.579224, "o", " "] +[960.77959, "o", "b"] +[960.851343, "o", "a"] +[960.915682, "o", "c"] +[961.049011, "o", "k"] +[961.201567, "o", " "] +[962.4203, "o", "i"] +[962.475459, "o", "n"] +[962.61159, "o", " "] +[962.714878, "o", "t"] +[962.851242, "o", "h"] +[962.931223, "o", "e"] +[963.051282, "o", " "] +[963.907009, "o", "o"] +[964.042918, "o", "r"] +[964.123059, "o", "i"] +[964.251116, "o", "g"] +[964.299084, "o", "i"] +[964.45136, "o", "n"] +[964.555122, "o", "a"] +[964.931034, "o", "l"] +[965.155018, "o", " "] +[972.371607, "o", "f"] +[972.555443, "o", "u"] +[972.643143, "o", "n"] +[972.76443, "o", "c"] +[973.027243, "o", "t"] +[973.075346, "o", "i"] +[973.13917, "o", "o"] +[973.344676, "o", "n"] +[973.499763, "o", " "] +[976.467642, "o", "\r\n"] +[976.467915, "o", "(gdb) "] +[996.435802, "o", "quit\r\n"] +[996.436065, "o", "A debugging session is active.\r\n\r\n\tInferior 1 [process 1] will be detached.\r\n\r\nQuit anyway? (y or n) "] +[997.892586, "o", "y"] +[998.379483, "o", "\r\nDetaching from program: /linux/vmlinux, process 1\r\n"] +[998.380309, "o", "Ending remote debugging.\r\n"] +[998.380413, "o", "[Inferior 1 (process 1) detached]\r\n"] +[998.387415, "o", "$ "] +[1004.003082, "o", "^L"] +[1004.78691, "o", "\b \b\b \b"] +[1005.515685, "o", "#"] +[1005.891313, "o", " "] +[1006.098914, "o", "n"] +[1006.171159, "o", "o"] +[1006.251338, "o", "w"] +[1006.379746, "o", " "] +[1006.642949, "o", "l"] +[1006.756392, "o", "e"] +[1006.972256, "o", "t"] +[1007.451427, "o", "s"] +[1008.78733, "o", " "] +[1011.707191, "o", "t"] +[1011.922977, "o", "r"] +[1012.075141, "o", "y"] +[1012.211589, "o", " "] +[1012.407443, "o", "t"] +[1012.496215, "o", "o"] +[1012.63914, "o", " "] +[1013.171266, "o", "d"] +[1013.280435, "o", "o"] +[1013.382917, "o", " "] +[1013.699697, "o", "a"] +[1015.067856, "o", " "] +[1017.747341, "o", "p"] +[1017.899377, "o", "r"] +[1018.02099, "o", "i"] +[1018.144684, "o", "v"] +[1018.219465, "o", "i"] +[1018.435513, "o", "l"] +[1018.578748, "o", "e"] +[1018.898886, "o", "d"] +[1019.160504, "o", "g"] +[1019.315664, "o", "e"] +[1020.651137, "o", " "] +[1020.795118, "o", "t"] +[1020.971166, "o", "r"] +[1021.085116, "o", "a"] +[1021.231631, "o", "n"] +[1021.387101, "o", "s"] +[1021.675097, "o", "i"] +[1021.867216, "o", "t"] +[1021.930784, "o", "i"] +[1021.994802, "o", "o"] +[1022.227196, "o", "n"] +[1022.34665, "o", " "] +[1032.083575, "o", "^[[D"] +[1032.923178, "o", "\b \b"] +[1033.050768, "o", "\b \b"] +[1033.195851, "o", "\b \b\b \b"] +[1033.355124, "o", "\b \b"] +[1034.275365, "o", "\r\n"] +[1034.275424, "o", "$ "] +[1035.090995, "o", "#"] +[1035.571157, "o", " "] +[1038.299127, "o", "f"] +[1038.418833, "o", "o"] +[1038.546673, "o", "r"] +[1038.643075, "o", " "] +[1038.811737, "o", "t"] +[1038.986934, "o", "h"] +[1039.075088, "o", "a"] +[1039.171894, "o", "t"] +[1039.290797, "o", " "] +[1039.714845, "o", "w"] +[1040.060445, "o", "e"] +[1040.579062, "o", " "] +[1040.715253, "o", "w"] +[1040.835144, "o", "i"] +[1041.008189, "o", "l"] +[1041.188661, "o", "l"] +[1062.274587, "o", " "] +[1063.994677, "o", "r"] +[1064.066586, "o", "u"] +[1064.258739, "o", "n"] +[1064.754719, "o", " "] +[1064.866336, "o", "a"] +[1064.95491, "o", " "] +[1069.779045, "o", "u"] +[1069.842686, "o", "s"] +[1069.898904, "o", "e"] +[1069.992062, "o", "r"] +[1070.090743, "o", "s"] +[1070.186611, "o", "p"] +[1070.271994, "o", "a"] +[1070.335417, "o", "c"] +[1070.435046, "o", "e"] +[1070.490858, "o", " "] +[1070.667137, "o", "l"] +[1070.843138, "o", "o"] +[1070.978461, "o", "o"] +[1071.042789, "o", "p"] +[1072.299918, "o", "\r\n$ "] +[1072.677072, "o", "#"] +[1072.8757, "o", " "] +[1073.123342, "o", "t"] +[1073.187116, "o", "o"] +[1073.234967, "o", " "] +[1073.64715, "o", "m"] +[1073.723177, "o", "a"] +[1074.003257, "o", "x"] +[1074.066757, "o", "i"] +[1074.242917, "o", "m"] +[1074.440397, "o", "e"] +[1074.951277, "o", "\b \b"] +[1075.082786, "o", "i"] +[1075.154841, "o", "z"] +[1075.306772, "o", "e"] +[1075.362669, "o", " "] +[1075.507002, "o", "t"] +[1075.563397, "o", "h"] +[1075.667216, "o", "e"] +[1075.759055, "o", " "] +[1075.971488, "o", "c"] +[1076.40312, "o", "h"] +[1076.538776, "o", "a"] +[1076.822943, "o", "n"] +[1077.090772, "o", "c"] +[1077.179219, "o", "e"] +[1077.274764, "o", " "] +[1077.427, "o", "o"] +[1077.538624, "o", "f"] +[1077.6266, "o", " "] +[1080.099022, "o", "c"] +[1080.220601, "o", "a"] +[1080.435652, "o", "t"] +[1080.642958, "o", "c"] +[1080.754845, "o", "h"] +[1080.890696, "o", "i"] +[1080.970811, "o", "n"] +[1081.066751, "o", "g"] +[1081.187186, "o", " "] +[1081.450963, "o", "a"] +[1081.535893, "o", " "] +[1082.419055, "o", "u"] +[1082.554783, "o", "s"] +[1082.707252, "o", "e"] +[1082.778665, "o", "r"] +[1082.907521, "o", " "] +[1083.155048, "o", "-"] +[1083.642635, "o", ">"] +[1083.82688, "o", " "] +[1083.995373, "o", "i"] +[1084.106729, "o", "r"] +[1084.15548, "o", "r"] +[1084.226868, "o", "q"] +[1084.523026, "o", " "] +[1084.722585, "o", "t"] +[1084.890951, "o", "r"] +[1084.955439, "o", "a"] +[1085.066325, "o", "n"] +[1085.170702, "o", "s"] +[1085.490659, "o", "i"] +[1085.791096, "o", "t"] +[1085.864736, "o", "i"] +[1085.89322, "o", "o"] +[1086.098742, "o", "n"] +[1086.763832, "o", " "] +[1093.283335, "o", "\r\n"] +[1093.285702, "o", "$ "] +[1094.41432, "o", "^[[A"] +[1094.786061, "o", "^[[A"] +[1095.389595, "o", "\b \b"] +[1095.895038, "o", "\b \b"] +[1095.9211, "o", "\b \b\b \b"] +[1095.953965, "o", "\b \b"] +[1095.985163, "o", "\b \b"] +[1096.0314, "o", "\b \b\b \b"] +[1096.289423, "o", "m"] +[1096.384651, "o", "i"] +[1096.91055, "o", "n"] +[1096.977922, "o", "i"] +[1097.17697, "o", "c"] +[1097.232856, "o", "o"] +[1097.320687, "o", "m"] +[1097.841256, "o", " "] +[1098.864769, "o", "-"] +[1099.174029, "o", "D"] +[1099.281202, "o", " "] +[1099.616635, "o", "s"] +[1099.680677, "o", "e"] +[1099.776722, "o", "r"] +[1099.840456, "o", "i"] +[1100.020817, "o", "a"] +[1100.128658, "o", "l"] +[1100.313642, "o", "."] +[1100.528928, "o", "p"] +[1100.712992, "o", "t"] +[1101.260834, "o", "s"] +[1101.369434, "o", "\r\n"] +[1101.370212, "o", "\u001b[!p\u001b[?3;4l\u001b[4l\u001b>\u001b[0m\u001b(B\u001b[?1h\u001b=\u001b[H\u001b[2J"] +[1101.370539, "o", "\u001b[?12l\u001b[?25h\nWelcome to minicom 2.7.1\r\n\nOPTIONS: I18n \r\nCompiled on Dec 23 2019, 02:06:26.\r\nPort serial.pts, 22:39:44\r\n\nPress CTRL-A Z for help on special keys\r\n\n"] +[1102.49074, "o", "\n"] +[1102.491207, "o", "Poky (Yocto Project Reference Distro) 2.3 qemux86 /dev/hvc0"] +[1102.491482, "o", "\r\n"] +[1102.492003, "o", "\n"] +[1102.492481, "o", "qemux86 login: "] +[1102.921642, "o", "r"] +[1103.001363, "o", "o"] +[1103.129677, "o", "o"] +[1103.217562, "o", "t"] +[1103.322957, "o", "\r\n"] +[1103.376683, "o", "root@qemux86:~# "] +[1104.675056, "o", "\r\n"] +[1104.676255, "o", "root@qemux86:~# "] +[1107.089642, "o", "i"] +[1107.298186, "o", "="] +[1107.529893, "o", "0"] +[1107.802507, "o", ";"] +[1108.129744, "o", " "] +[1108.378208, "o", "w"] +[1108.433774, "o", "h"] +[1108.489985, "o", "i"] +[1108.641622, "o", "l"] +[1108.713754, "o", "e"] +[1108.834396, "o", " "] +[1108.945519, "o", "t"] +[1109.082068, "o", "r"] +[1109.161673, "o", "u"] +[1109.601342, "o", "e"] +[1110.130521, "o", ";"] +[1110.937837, "o", " "] +[1111.081655, "o", "d"] +[1111.170074, "o", "o"] +[1113.96216, "o", " "] +[1115.225582, "o", "i"] +[1115.635039, "o", "="] +[1115.993851, "o", "$"] +[1116.177231, "o", "["] +[1116.512499, "o", "i"] +[1116.955475, "o", "\b\u001b[16;1H\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\u001b[15;41H\u001b[K"] +[1117.241465, "o", "$"] +[1117.490132, "o", "i"] +[1118.170026, "o", "+"] +[1118.473776, "o", "1"] +[1118.641428, "o", "]"] +[1120.374762, "o", ";"] +[1120.986189, "o", " "] +[1122.00169, "o", "d"] +[1122.042127, "o", "o"] +[1122.224768, "o", "n"] +[1122.362033, "o", "e"] +[1123.097725, "o", " "] +[1124.041576, "o", "&"] +[1124.842453, "o", "\r\n"] +[1124.846558, "o", "root@qemux86:~# "] +[1125.945623, "o", "t"] +[1126.06538, "o", "o"] +[1126.153341, "o", "p"] +[1126.819453, "o", "\r\n"] +[1127.026599, "o", "\u001b[1;1H\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\u001b[1;1H\u001b[KMem: 30712K used, 209968K free, 172K shrd, 368K buff, 4344K cached"] +[1127.026754, "o", "\r\n"] +[1127.026857, "o", "CPU: 86% usr 13% sys 0% nic 0% idle 0% io 0% irq 0% sirq"] +[1127.027083, "o", "\r\n"] +[1127.027772, "o", "Load average: 0.28 0.65 0.45 2/38 348"] +[1127.027961, "o", "\r\n"] +[1127.044934, "o", "\u001b[0m\u001b(B\u001b[7m PID PPID USER STAT VSZ %VSZ %CPU COMMAND"] +[1127.045363, "o", "\r\n"] +[1127.050449, "o", "\u001b[0m\u001b(B 347 345 root R 2972 1% 80% -sh"] +[1127.050668, "o", "\r\n"] +[1127.051028, "o", " 348 345 root R 2828 1% 7% top"] +[1127.051127, "o", "\r\n"] +[1127.052058, "o", " 10 2 root IW 0 0% 7% [rcu_sched]"] +[1127.052239, "o", "\r\n"] +[1127.05275, "o", " 345 1 root S 2972 1% 0% -sh"] +[1127.052867, "o", "\r\n"] +[1127.057884, "o", " 198 1 root S 2828 1% 0% /sbin/syslogd -n -O /var/log/messages"] +[1127.058054, "o", "\r\n"] +[1127.058664, "o", " 201 1 root S 2828 1% 0% /sbin/klogd -n"] +[1127.058723, "o", "\r\n"] +[1127.059444, "o", " 207 1 root S 2828 1% 0% /sbin/getty 38400 tty1"] +[1127.059553, "o", "\r\n"] +[1127.060102, "o", " 209 1 root S 2828 1% 0% /sbin/getty 38400 tty2"] +[1127.060272, "o", "\r\n"] +[1127.06083, "o", " 210 1 root S 2828 1% 0% /sbin/getty 38400 tty3"] +[1127.060931, "o", "\r\n"] +[1127.066093, "o", " 211 1 root S 2828 1% 0% /sbin/getty 38400 tty4"] +[1127.066277, "o", "\r\n"] +[1127.066919, "o", " 212 1 root S 2828 1% 0% /sbin/getty 38400 tty5"] +[1127.067167, "o", "\r\n"] +[1127.067611, "o", " 187 1 root S 2828 1% 0% udhcpc -R -b -p /var/run/udhcpc.eth0.p\u001b[16;80H"] +[1127.067744, "o", "\r\n"] +[1127.068254, "o", " 1 0 root S 2004 1% 0% init [5]"] +[1127.068396, "o", "\r\n"] +[1127.06884, "o", " 42 2 root SWN 0 0% 0% [kmemleak]"] +[1127.068996, "o", "\r\n"] +[1127.074239, "o", " 9 2 root SW 0 0% 0% [ksoftirqd/0]"] +[1127.074463, "o", "\r\n"] +[1127.074935, "o", " 39 2 root IW 0 0% 0% [kworker/0:2-eve]"] +[1127.07527, "o", "\r\n"] +[1127.07566, "o", " 13 2 root SW 0 0% 0% [kdevtmpfs]"] +[1127.075843, "o", "\r\n"] +[1127.076346, "o", " 38 2 root IW 0 0% 0% [kworker/u2:1-ev]"] +[1127.076463, "o", "\r\n"] +[1127.07694, "o", " 34 2 root IW< 0 0% 0% [kworker/0:1H-kb]"] +[1127.077152, "o", "\r\n"] +[1127.081642, "o", " 43 2 root SW 0 0% 0% [jbd2/vda-8]\r"] +[1132.126436, "o", "\u001b[1;1H\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\u001b[1;1H\u001b[KMem: 30772K used, 209908K free, 172K shrd, 368K buff, 4344K cached"] +[1132.126724, "o", "\r\n"] +[1132.126816, "o", "CPU: 97% usr 2% sys 0% nic 0% idle 0% io 0% irq 0% sirq"] +[1132.12694, "o", "\r\n"] +[1132.127521, "o", "Load average: 0.34 0.66 0.46 2/38 348"] +[1132.127666, "o", "\r\n"] +[1132.128837, "o", "\u001b[0m\u001b(B\u001b[7m PID PPID USER STAT VSZ %VSZ %CPU COMMAND"] +[1132.129009, "o", "\r\n"] +[1132.134166, "o", "\u001b[0m\u001b(B 347 345 root R 2972 1% 99% -sh"] +[1132.134454, "o", "\r\n"] +[1132.135025, "o", " 348 345 root R 2972 1% 1% top"] +[1132.135336, "o", "\r\n"] +[1132.135943, "o", " 345 1 root S 2972 1% 0% -sh"] +[1132.13614, "o", "\r\n"] +[1132.13659, "o", " 198 1 root S 2828 1% 0% /sbin/syslogd -n -O /var/log/messages"] +[1132.136798, "o", "\r\n"] +[1132.141677, "o", " 201 1 root S 2828 1% 0% /sbin/klogd -n"] +[1132.141859, "o", "\r\n"] +[1132.142517, "o", " 207 1 root S 2828 1% 0% /sbin/getty 38400 tty1"] +[1132.142783, "o", "\r\n"] +[1132.143294, "o", " 209 1 root S 2828 1% 0% /sbin/getty 38400 tty2"] +[1132.14353, "o", "\r\n"] +[1132.143957, "o", " 210 1 root S 2828 1% 0% /sbin/getty 38400 tty3"] +[1132.144265, "o", "\r\n"] +[1132.1447, "o", " 211 1 root S 2828 1% 0% /sbin/getty 38400 tty4"] +[1132.14492, "o", "\r\n"] +[1132.150219, "o", " 212 1 root S 2828 1% 0% /sbin/getty 38400 tty5"] +[1132.150349, "o", "\r\n"] +[1132.150878, "o", " 187 1 root S 2828 1% 0% udhcpc -R -b -p /var/run/udhcpc.eth0.p\u001b[15;80H"] +[1132.151051, "o", "\r\n"] +[1132.151431, "o", " 1 0 root S 2004 1% 0% init [5]"] +[1132.151581, "o", "\r\n"] +[1132.152171, "o", " 42 2 root SWN 0 0% 0% [kmemleak]"] +[1132.152327, "o", "\r\n"] +[1132.152911, "o", " 9 2 root SW 0 0% 0% [ksoftirqd/0]"] +[1132.153024, "o", "\r\n"] +[1132.157768, "o", " 10 2 root IW 0 0% 0% [rcu_sched]"] +[1132.157995, "o", "\r\n"] +[1132.158368, "o", " 39 2 root IW 0 0% 0% [kworker/0:2-eve]"] +[1132.158645, "o", "\r\n"] +[1132.159109, "o", " 13 2 root SW 0 0% 0% [kdevtmpfs]"] +[1132.159346, "o", "\r\n"] +[1132.159794, "o", " 38 2 root IW 0 0% 0% [kworker/u2:1-ev]"] +[1132.159838, "o", "\r\n"] +[1132.160264, "o", " 34 2 root IW< 0 0% 0% [kworker/0:1H-kb]"] +[1132.16044, "o", "\r\n"] +[1132.160661, "o", " 43 2 root SW 0 0% 0% [jbd2/vda-8]\r"] +[1133.586006, "o", "\n\u001b[23;80H \u001b[24;1H"] +[1133.589361, "o", "root@qemux86:~# "] +[1135.56963, "o", "#"] +[1135.761792, "o", " "] +[1136.825768, "o", "o"] +[1136.897392, "o", "k"] +[1137.009583, "o", " "] +[1137.185503, "o", "t"] +[1137.313332, "o", "h"] +[1137.433542, "o", "e"] +[1137.538696, "o", " "] +[1137.849713, "o", "C"] +[1137.905725, "o", "P"] +[1138.081863, "o", "U"] +[1138.649702, "o", " "] +[1138.833525, "o", "i"] +[1138.937694, "o", "s"] +[1139.001327, "o", " "] +[1139.201449, "o", "m"] +[1139.26509, "o", "o"] +[1139.417946, "o", "s"] +[1139.697189, "o", "t"] +[1139.785468, "o", "ly"] +[1140.337652, "o", " "] +[1140.521323, "o", "ru"] +[1140.577507, "o", "nn"] +[1140.785505, "o", "in"] +[1140.865534, "o", "g"] +[1141.073726, "o", " "] +[1142.521937, "o", "u"] +[1142.625315, "o", "s"] +[1142.681312, "o", "e"] +[1142.746069, "o", "r"] +[1142.89747, "o", "s"] +[1143.000997, "o", "p"] +[1144.048991, "o", "a"] +[1144.46552, "o", "c"] +[1144.569368, "o", "e"] +[1144.713469, "o", " "] +[1145.721142, "o", "c"] +[1145.849051, "o", "o"] +[1145.937576, "o", "d"] +[1146.129514, "o", "e"] +[1149.698062, "o", "\r\n\u001b[23;80H \u001b[24;1H"] +[1149.699006, "o", "root@qemux86:~# "] +[1150.777065, "o", "#"] +[1151.009906, "o", " "] +[1151.217704, "o", "l"] +[1151.289227, "o", "e"] +[1151.48157, "o", "t"] +[1151.689748, "o", "s"] +[1151.817419, "o", " "] +[1152.13751, "o", "g"] +[1152.209163, "o", "o"] +[1152.345232, "o", " "] +[1152.921542, "o", "b"] +[1152.985042, "o", "a"] +[1153.12187, "o", "c"] +[1153.225154, "o", "k"] +[1153.368917, "o", " "] +[1153.521092, "o", "t"] +[1153.59312, "o", "o"] +[1153.673022, "o", " "] +[1153.75302, "o", "t"] +[1153.881685, "o", "h"] +[1153.929562, "o", "e"] +[1154.025017, "o", " "] +[1154.154048, "o", "d"] +[1154.193966, "o", "e"] +[1154.296968, "o", "b"] +[1154.353443, "o", "u"] +[1154.513136, "o", "g"] +[1154.665637, "o", "g"] +[1154.74519, "o", "e"] +[1154.841208, "o", "r"] +[1157.898001, "o", "\r\n\u001b[23;80H \u001b[24;1H"] +[1157.898871, "o", "root@qemux86:~# "] +[1159.312239, "o", "\u001b[0m\u001b(B\u001b[7m\r\u001b[K\u001b[?12l\u001b[?25h\u001b[?25lCTRL-A Z for help | 115200 8N1 | NOR | Minicom 2.7.1 | VT102 | Offline | al.pts\u001b[?12l\u001b[?25h\u001b[24;17H"] +[1159.680333, "o", "\u001b[8;30H\u001b[?25l\u001b[0m\u001b(B\u001b(0lqqqqqqqqqqqqqqqqqqqqqqk\u001b[9;30Hx\u001b[0m\u001b(B Leave Minicom? \u001b[0m\u001b(B\u001b(0x\u001b[10;30Hx\u001b[0m\u001b(B No \u001b[0m\u001b(B\u001b(0x\u001b[11;30Hmqqqqqqqqqqqqqqqqqqqqqqj\u001b[10;51H\u001b[?25l\u001b[10;33H\u001b[0m\u001b(B\u001b[7m Yes "] +[1160.319825, "o", "\u001b[?12l\u001b[?25h\u001b[8;1H\u001b[0m\u001b(B 209 1 root S 2828 1% 0% /sbin/getty\u001b[9;1H 210 1 root S 2828 1% 0% /sbin/getty\u001b[10;1H 211 1 root S 2828 1% 0% /sbin/getty\u001b[11;1H 212 1 root S 2828 1% 0% /sbin/getty\u001b[24;17H\u001b[0m\u001b(B\u001b[7m\u001b[?12l\u001b[?25h"] +[1160.320008, "o", "\u001b[?12l\u001b[?25h\u001b[0m\u001b(B\u001b[H\u001b[2J\u001b[?12l\u001b[?25h\u001b[?1l\u001b>\u001b[!p\u001b[?3;4l\u001b[4l\u001b>"] +[1160.32013, "o", "$ "] +[1161.688843, "o", "m"] +[1161.800345, "o", "a"] +[1161.856099, "o", "k"] +[1162.008302, "o", "e"] +[1162.072387, "o", " "] +[1162.328258, "o", "g"] +[1162.408339, "o", "d"] +[1162.488446, "o", "b"] +[1162.904659, "o", "\r\n"] +[1162.910043, "o", "gdb -ex \"target remote localhost:1234\" /linux/vmlinux\r\n"] +[1162.944541, "o", "\u001b[35;1m\u001b[35;1mGNU gdb \u001b[m\u001b[35;1m(Ubuntu 9.2-0ubuntu1~20.04) \u001b[m\u001b[35;1m9.2\u001b[m\u001b[35;1m\r\n\u001b[m\u001b[mCopyright (C) 2020 Free Software Foundation, Inc.\r\nLicense GPLv3+: GNU GPL version 3 or later \r\nThis is free software: you are free to change and redistribute it.\r\nThere is NO WARRANTY, to the extent permitted by law.\r\nType \"show copying\" and \"show warranty\" for details.\r\nThis GDB was configured as \"x86_64-linux-gnu\".\r\nType \"show configuration\" for configuration details.\r\nFor bug reporting instructions, please see:\r\n.\r\nFind the GDB manual and other documentation resources online at:\r\n .\r\n\r\n"] +[1162.944647, "o", "For help, type \"help\".\r\nType \"apropos word\" to search for commands related to \"word\"...\r\n"] +[1162.945021, "o", "Reading symbols from \u001b[32m/linux/vmlinux\u001b[m...\r\n"] +[1163.591264, "o", "Remote debugging using localhost:1234\r\n"] +[1163.599772, "o", "\u001b[34m0x448ac101\u001b[m in \u001b[33m??\u001b[m ()\r\n"] +[1163.600288, "o", "(gdb) "] +[1167.808759, "o", "#"] +[1168.054699, "o", " "] +[1174.704625, "o", "u"] +[1174.784712, "o", "s"] +[1174.864648, "o", "e"] +[1174.944398, "o", "r"] +[1175.057207, "o", "s"] +[1175.176743, "o", "p"] +[1175.225215, "o", "a"] +[1175.345433, "o", "c"] +[1175.433465, "o", "e"] +[1175.601394, "o", " "] +[1175.744383, "o", "a"] +[1175.920469, "o", "d"] +[1176.072352, "o", "d"] +[1176.321026, "o", "r"] +[1176.656814, "o", "e"] +[1176.832588, "o", "s"] +[1176.960611, "o", "s"] +[1177.072884, "o", ","] +[1177.168436, "o", " "] +[1177.329116, "o", "g"] +[1177.404147, "o", "o"] +[1177.448381, "o", "o"] +[1177.598144, "o", "d"] +[1177.662444, "o", "!"] +[1180.24054, "o", "\r\n"] +[1180.24062, "o", "(gdb) "] +[1181.720922, "o", "b"] +[1181.80043, "o", "r"] +[1181.86486, "o", "e"] +[1181.929251, "o", "a"] +[1182.024706, "o", "k"] +[1182.105515, "o", " "] +[1195.76893, "o", "h"] +[1195.849596, "o", "a"] +[1195.960904, "o", "n"] +[1196.088828, "o", "d"] +[1196.176629, "o", "l"] +[1196.496808, "o", "e"] +[1196.672653, "o", "_"] +[1197.256752, "o", "e"] +[1197.416356, "o", "x"] +[1197.656574, "o", "c"] +[1197.720445, "o", "e"] +[1197.793172, "o", "p"] +[1197.945795, "o", "t"] +[1198.018687, "o", "i"] +[1198.052227, "o", "o"] +[1198.226222, "o", "n"] +[1198.528674, "o", "\r\n"] +[1198.579682, "o", "Breakpoint 1 at \u001b[34m0xc15dea1f\u001b[m: file \u001b[32march/x86/entry/entry_32.S\u001b[m, line 1154.\r\n"] +[1198.57999, "o", "(gdb) "] +[1200.261062, "o", "c"] +[1200.433433, "o", "\r\nContinuing.\r\n"] +[1200.436772, "o", "\r\n"] +[1200.437049, "o", "Breakpoint 1, \u001b[33mhandle_exception\u001b[m () at \u001b[32march/x86/entry/entry_32.S\u001b[m:1154\r\n1154\t\tSAVE_ALL switch_stacks=1 skip_gs=1 unwind_espfix=1\r\n"] +[1200.437198, "o", "(gdb) "] +[1228.112751, "o", "b"] +[1228.201197, "o", "t"] +[1228.329203, "o", "\r\n"] +[1228.331815, "o", "#0 \u001b[33mhandle_exception\u001b[m () at \u001b[32march/x86/entry/entry_32.S\u001b[m:1154\r\n"] +[1228.332762, "o", "#1 \u001b[34m0xc15d3840\u001b[m in \u001b[33m??\u001b[m () at \u001b[32march/x86/kernel/setup.c\u001b[m:755\r\nBacktrace stopped: previous frame inner to this frame (corrupt stack?)\r\n(gdb) "] +[1239.893902, "o", "#"] +[1240.072314, "o", " "] +[1240.184169, "o", "l"] +[1240.272133, "o", "e"] +[1240.416193, "o", "t"] +[1240.603099, "o", "s"] +[1240.678107, "o", " "] +[1243.392803, "o", "l"] +[1243.592484, "o", "o"] +[1243.744272, "o", "o"] +[1243.856711, "o", "k"] +[1244.075404, "o", " "] +[1244.232286, "o", "a"] +[1244.712632, "o", "t"] +[1244.792148, "o", " "] +[1244.904252, "o", "t"] +[1244.992334, "o", "h"] +[1245.091434, "o", "e"] +[1245.173492, "o", " "] +[1245.336103, "o", "s"] +[1245.497425, "o", "t"] +[1245.54492, "o", "a"] +[1245.720423, "o", "c"] +[1245.784262, "o", "k"] +[1245.872308, "o", " "] +[1245.984224, "o", "a"] +[1246.272808, "o", "a"] +[1246.688388, "o", "\b\u001b[K"] +[1246.744332, "o", "g"] +[1246.816332, "o", "a"] +[1246.968488, "o", "i"] +[1247.040167, "o", "n"] +[1247.809286, "o", "\r\n"] +[1247.809419, "o", "(gdb) "] +[1248.16016, "o", "#"] +[1248.32863, "o", " "] +[1248.496948, "o", "t"] +[1248.608703, "o", "h"] +[1248.751015, "o", "e"] +[1249.152273, "o", " "] +[1249.304321, "o", "f"] +[1249.401613, "o", "i"] +[1249.560984, "o", "r"] +[1249.933296, "o", "s"] +[1250.168173, "o", "t"] +[1250.449313, "o", " "] +[1250.767425, "o", "i"] +[1250.969452, "o", "t"] +[1251.048783, "o", "e"] +[1251.160335, "o", "m"] +[1251.328476, "o", " "] +[1251.528696, "o", "o"] +[1251.744914, "o", "n"] +[1251.845411, "o", " "] +[1251.96225, "o", "t"] +[1252.081386, "o", "e"] +[1252.096296, "o", "h"] +[1252.25686, "o", " "] +[1252.376629, "o", "s"] +[1252.504275, "o", "t"] +[1252.560027, "o", "a"] +[1252.824826, "o", "\b\u001b[K"] +[1252.968895, "o", "\b\u001b[K"] +[1253.112952, "o", "\b\u001b[K"] +[1253.248719, "o", "\b\u001b[K"] +[1253.368406, "o", "\b\u001b[K"] +[1253.576566, "o", "\b\u001b[K"] +[1253.769049, "o", "h"] +[1253.848377, "o", "e"] +[1253.928207, "o", " "] +[1254.07566, "o", "s"] +[1254.224132, "o", "t"] +[1254.256299, "o", "a"] +[1254.46369, "o", "c"] +[1254.528393, "o", "k"] +[1254.62412, "o", " "] +[1254.792238, "o", "s"] +[1254.896183, "o", "h"] +[1254.976616, "o", "o"] +[1255.095813, "o", "u"] +[1255.233173, "o", "l"] +[1255.328849, "o", "d"] +[1255.422248, "o", " "] +[1255.640996, "o", "b"] +[1255.712744, "o", "e"] +[1255.832174, "o", " "] +[1256.211236, "o", "t"] +[1256.280666, "o", "h"] +[1256.400446, "o", "e"] +[1256.487918, "o", " "] +[1258.854607, "o", "t"] +[1258.941545, "o", "i"] +[1259.024791, "o", "m"] +[1259.04853, "o", "e"] +[1259.121157, "o", "r"] +[1259.224098, "o", " "] +[1259.360266, "o", "h"] +[1259.408186, "o", "a"] +[1259.552178, "o", "n"] +[1259.616451, "o", "d"] +[1259.762455, "o", "l"] +[1259.841476, "o", "e"] +[1259.913474, "o", "r"] +[1260.720512, "o", "\r\n(gdb) "] +[1261.704102, "o", "p"] +[1261.847725, "o", "r"] +[1261.936837, "o", "i"] +[1261.984576, "o", "n"] +[1262.066128, "o", "t"] +[1262.145208, "o", " "] +[1262.424767, "o", "("] +[1262.624087, "o", "v"] +[1262.737455, "o", "o"] +[1263.000827, "o", "d"] +[1263.399755, "o", "\b\u001b[K"] +[1263.544234, "o", "i"] +[1263.648825, "o", "d"] +[1263.792709, "o", " "] +[1264.222987, "o", "*"] +[1264.367238, "o", ")"] +[1264.648392, "o", "("] +[1267.639225, "o", "*"] +[1267.776229, "o", "("] +[1269.23218, "o", "u"] +[1269.400337, "o", "i"] +[1269.448756, "o", "n"] +[1269.579068, "o", "t"] +[1269.800118, "o", "3"] +[1269.872245, "o", "2"] +[1270.048002, "o", "_"] +[1270.200422, "o", "t"] +[1270.579908, "o", "*"] +[1270.696635, "o", ")"] +[1271.552099, "o", "$"] +[1271.941288, "o", "e"] +[1272.232936, "o", "p"] +[1272.416525, "o", "s"] +[1273.783112, "o", "\b\u001b[K"] +[1273.87923, "o", "\b\u001b[K"] +[1273.942963, "o", "s"] +[1274.127877, "o", "p"] +[1274.688937, "o", ")"] +[1277.832273, "o", "\r\n"] +[1277.834178, "o", "$1 = (void *) \u001b[34m0xc15d3840\u001b[m <\u001b[33msysvec_apic_timer_interrupt\u001b[m>\r\n(gdb) "] +[1280.216526, "o", "#"] +[1280.440317, "o", " "] +[1280.905338, "o", "n"] +[1281.006222, "o", "e"] +[1281.096051, "o", "x"] +[1281.352079, "o", "t"] +[1281.45623, "o", " "] +[1281.512205, "o", "w"] +[1281.592294, "o", "e"] +[1281.728646, "o", " "] +[1281.887957, "o", "s"] +[1282.014283, "o", "h"] +[1282.096593, "o", "o"] +[1282.208553, "o", "u"] +[1282.336615, "o", "l"] +[1282.448312, "o", "d"] +[1282.49654, "o", " "] +[1282.784631, "o", "h"] +[1282.889574, "o", "a"] +[1283.030069, "o", "v"] +[1283.096451, "o", "e"] +[1283.14104, "o", " "] +[1283.336121, "o", " "] +[1283.640365, "o", "0"] +[1284.105053, "o", "\b\u001b[K"] +[1284.232359, "o", "\b\u001b[K"] +[1284.456187, "o", "0"] +[1286.217444, "o", ","] +[1286.428228, "o", " "] +[1286.634031, "o", "t"] +[1286.725784, "o", "h"] +[1286.864535, "o", "e"] +[1286.968074, "o", "n"] +[1287.047946, "o", " "] +[1287.143945, "o", "t"] +[1287.24016, "o", "h"] +[1287.312374, "o", "e"] +[1287.434149, "o", " "] +[1287.648797, "o", "o"] +[1287.853749, "o", "l"] +[1287.93696, "o", "d"] +[1288.056131, "o", " "] +[1288.917063, "o", "E"] +[1289.135849, "o", "I"] +[1289.231925, "o", "P"] +[1291.056219, "o", "\r\n(gdb) "] +[1292.1377, "o", "p"] +[1292.411602, "o", "r"] +[1292.528097, "o", "i"] +[1292.599943, "o", "n"] +[1293.344827, "o", "\b\b\b\b# next we should have 0, then the old EIP"] +[1294.000313, "o", "\r\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[9Pprint (void *)(*(uint32_t*)$esp)"] +[1294.377399, "o", "\b"] +[1294.664387, "o", "\b"] +[1294.842701, "o", "\b"] +[1294.944346, "o", "\b"] +[1295.296547, "o", "\b"] +[1295.886427, "o", "($esp)\b\b\b\b\b"] +[1296.416543, "o", "\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C"] +[1296.608222, "o", "\b"] +[1297.026087, "o", "+)\b\b\u001b[1P)\b"] +[1297.560139, "o", "+)\b"] +[1297.916844, "o", "4)\b"] +[1298.397975, "o", "\u001b[C)\b"] +[1299.632357, "o", "\r\n"] +[1299.633204, "o", "$2 = (void *) \u001b[34m0x0\u001b[m\r\n(gdb) "] +[1300.373248, "o", "print (void *)(*(uint32_t*)($esp+4))"] +[1300.688192, "o", "\b"] +[1300.856538, "o", "\b"] +[1301.000194, "o", "\b"] +[1301.168188, "o", "\u001b[1P))\b\b"] +[1301.952667, "o", "8))\b\b"] +[1302.457015, "o", "\r\n"] +[1302.458153, "o", "$3 = (void *) \u001b[34m0x448abff0\u001b[m\r\n(gdb) "] +[1303.41604, "o", "#"] +[1303.712934, "o", " "] +[1304.968705, "o", "t"] +[1305.080618, "o", "h"] +[1305.112913, "o", "i"] +[1305.320332, "o", "s"] +[1305.450583, "o", " "] +[1305.784044, "o", "c"] +[1305.879818, "o", "e"] +[1306.01577, "o", "r"] +[1306.215986, "o", "t"] +[1306.408455, "o", "a"] +[1306.509271, "o", "i"] +[1306.568281, "o", "n"] +[1306.736051, "o", "l"] +[1306.968107, "o", "y"] +[1307.032157, "o", " "] +[1307.240792, "o", "l"] +[1307.392111, "o", "o"] +[1307.523872, "o", "o"] +[1307.56085, "o", "k"] +[1307.704084, "o", "s"] +[1307.760884, "o", " "] +[1308.464805, "o", "l"] +[1308.632045, "o", "i"] +[1308.784287, "o", "k"] +[1308.847688, "o", "e"] +[1308.920087, "o", " "] +[1309.088096, "o", "a"] +[1309.16807, "o", " "] +[1310.540839, "o", "u"] +[1310.625164, "o", "s"] +[1310.681276, "o", "e"] +[1310.7603, "o", "r"] +[1310.887811, "o", "s"] +[1310.960368, "o", "p"] +[1311.072017, "o", "a"] +[1311.145256, "o", "c"] +[1311.223973, "o", "e"] +[1311.368353, "o", " "] +[1311.457182, "o", "a"] +[1311.601448, "o", "d"] +[1311.769504, "o", "d"] +[1311.968497, "o", "r"] +[1312.000587, "o", "e"] +[1312.160017, "o", "s"] +[1312.296067, "o", "s"] +[1312.672663, "o", "\r\n(gdb) "] +[1313.104304, "o", "#"] +[1313.247936, "o", " "] +[1313.591981, "o", "s"] +[1313.663257, "o", "o"] +[1313.7933, "o", " "] +[1314.376222, "o", "a"] +[1314.488137, "o", " "] +[1315.711982, "o", "p"] +[1315.873232, "o", "r"] +[1315.976308, "o", "i"] +[1316.097279, "o", "v"] +[1316.192412, "o", "i"] +[1317.808159, "o", "l"] +[1317.888621, "o", "e"] +[1318.806197, "o", "g"] +[1318.880036, "o", "e"] +[1319.007839, "o", " "] +[1320.044197, "o", "t"] +[1320.232041, "o", "r"] +[1320.37594, "o", "a"] +[1320.790313, "o", "n"] +[1320.960849, "o", "s"] +[1321.043584, "o", "i"] +[1321.228877, "o", "t"] +[1321.2893, "o", "i"] +[1321.328086, "o", "o"] +[1321.488163, "o", "n"] +[1321.560022, "o", " "] +[1321.743979, "o", "h"] +[1321.784015, "o", "a"] +[1321.883377, "o", "s"] +[1321.978322, "o", " "] +[1322.432097, "o", "h"] +[1322.504039, "o", "a"] +[1322.64008, "o", "p"] +[1322.751806, "o", "p"] +[1322.840014, "o", "e"] +[1322.952267, "o", "n"] +[1323.065126, "o", "e"] +[1323.222808, "o", "d"] +[1324.320102, "o", "\r\n(gdb) "] +[1328.287918, "o", "#"] +[1328.5767, "o", " "] +[1328.728319, "o", "t"] +[1328.807873, "o", "h"] +[1328.922227, "o", "e"] +[1329.031848, "o", " "] +[1329.201101, "o", "n"] +[1329.255849, "o", "e"] +[1329.3283, "o", "x"] +[1329.632289, "o", "t"] +[1329.720295, "o", " "] +[1329.85636, "o", "t"] +[1330.070787, "o", "w"] +[1330.180591, "o", "o"] +[1330.327382, "o", " "] +[1331.600174, "o", "v"] +[1331.688922, "o", "a"] +[1331.800273, "o", "l"] +[1331.967863, "o", "u"] +[1332.879624, "o", "e"] +[1333.008092, "o", "s"] +[1333.129677, "o", " "] +[1333.260857, "o", "o"] +[1333.408013, "o", "n"] +[1333.491288, "o", " "] +[1333.585346, "o", "t"] +[1333.67251, "o", "h"] +[1333.744198, "o", "e"] +[1333.824197, "o", " "] +[1333.927752, "o", "s"] +[1334.023852, "o", "t"] +[1334.088274, "o", "a"] +[1334.278186, "o", "c"] +[1334.312208, "o", "k"] +[1334.4004, "o", " "] +[1334.479627, "o", "s"] +[1334.569724, "o", "h"] +[1334.653599, "o", "o"] +[1334.761086, "o", "u"] +[1334.879876, "o", "l"] +[1334.967955, "o", "d"] +[1335.039806, "o", " "] +[1337.127967, "o", "b"] +[1337.568187, "o", "e"] +[1337.733548, "o", " "] +[1337.829755, "o", "t"] +[1337.920319, "o", "h"] +[1337.983981, "o", "e"] +[1338.096372, "o", " "] +[1338.472148, "o", "o"] +[1338.656814, "o", "l"] +[1338.768352, "o", "d"] +[1338.916423, "o", " "] +[1339.202871, "o", "C"] +[1339.327983, "o", "S"] +[1339.440216, "o", " "] +[1339.57566, "o", "a"] +[1339.70382, "o", "n"] +[1339.800022, "o", "d"] +[1339.878404, "o", " "] +[1342.408345, "o", "o"] +[1342.521478, "o", "d"] +[1342.569261, "o", "l"] +[1342.687708, "o", " "] +[1342.896122, "o", "E"] +[1343.07254, "o", "F"] +[1343.160126, "o", "L"] +[1343.252373, "o", "A"] +[1346.581722, "o", "G"] +[1346.645084, "o", "S"] +[1346.984099, "o", "\r\n(gdb) "] +[1349.368274, "o", "# the next two values on the stack should be the old CS and old EFLAGS"] +[1349.503807, "o", "\r\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[30Pso a privilege transition has happened"] +[1349.640129, "o", "\r\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[Cthis certainly looks like a userspace address"] +[1350.086523, "o", "\r\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[11Pprint (void *)(*(uint32_t*)($esp+8))"] +[1350.592191, "o", "\b"] +[1350.744442, "o", "\b"] +[1350.871908, "o", "\b"] +[1351.056417, "o", "\u001b[1P))\b\b"] +[1351.150322, "o", "1))\b\b"] +[1351.246318, "o", "2))\b\b"] +[1351.576371, "o", "\r\n"] +[1351.577107, "o", "$4 = (void *) \u001b[34m0x73\u001b[m\r\n(gdb) "] +[1352.136702, "o", "print (void *)(*(uint32_t*)($esp+12))"] +[1352.288158, "o", "\b"] +[1352.424696, "o", "\b"] +[1352.593518, "o", "\b"] +[1353.016482, "o", "\u001b[1P))\b\b"] +[1353.136234, "o", "4))\b\b"] +[1360.824627, "o", "\r\n"] +[1360.825733, "o", "$5 = (void *) \u001b[34m0x2820000\u001b[m\r\n"] +[1360.825786, "o", "(gdb) "] +[1363.824749, "o", "#"] +[1363.925525, "o", " "] +[1364.090304, "o", "l"] +[1364.897579, "o", "\b\u001b[K"] +[1365.125951, "o", "y"] +[1365.207584, "o", "o"] +[1365.719749, "o", "\b\u001b[K"] +[1365.840063, "o", "\b\u001b[K"] +[1366.048291, "o", "n"] +[1366.112558, "o", "o"] +[1366.280917, "o", "t"] +[1366.405468, "o", "i"] +[1366.600048, "o", "c"] +[1366.688182, "o", "e"] +[1366.799955, "o", " "] +[1366.953361, "o", "t"] +[1367.071836, "o", "h"] +[1367.188867, "o", "a"] +[1367.287932, "o", "t"] +[1367.379658, "o", " "] +[1367.521389, "o", "t"] +[1367.607602, "o", "h"] +[1367.695693, "o", "e"] +[1367.815874, "o", " "] +[1373.74383, "o", "o"] +[1373.896155, "o", "l"] +[1374.000406, "o", "d"] +[1374.05883, "o", " "] +[1374.488016, "o", "C"] +[1374.560064, "o", "S"] +[1374.70417, "o", " "] +[1374.927765, "o", "i"] +[1375.048476, "o", "s"] +[1375.159989, "o", " "] +[1375.523958, "o", "d"] +[1375.672027, "o", "i"] +[1375.823809, "o", "f"] +[1376.352436, "o", "f"] +[1376.488975, "o", "e"] +[1376.653416, "o", "r"] +[1376.712524, "o", "e"] +[1376.847597, "o", "n"] +[1377.00774, "o", "t"] +[1377.337116, "o", " "] +[1380.176217, "o", "\r\n"] +[1380.17633, "o", "(gdb) "] +[1381.519619, "o", "p"] +[1381.727674, "o", "r"] +[1381.807962, "o", "i"] +[1381.847678, "o", "n"] +[1381.992661, "o", "t"] +[1382.213413, "o", " "] +[1383.487682, "o", "/"] +[1383.720038, "o", " "] +[1384.284125, "o", "$"] +[1384.785109, "o", "\b\u001b[K"] +[1384.896215, "o", "\b\u001b[K"] +[1384.992093, "o", "x"] +[1385.119757, "o", " "] +[1385.462147, "o", "$"] +[1385.975882, "o", "c"] +[1386.039807, "o", "s"] +[1387.288752, "o", "\r\n"] +[1387.288917, "o", "$6 = 0x60\r\n(gdb) "] +[1392.071906, "o", "#"] +[1392.183952, "o", " "] +[1392.530314, "o", "s"] +[1392.64009, "o", "o"] +[1392.760001, "o", " "] +[1392.935708, "o", "w"] +[1393.023798, "o", "e"] +[1393.143801, "o", " "] +[1394.480397, "o", "s"] +[1394.552322, "o", "h"] +[1394.616216, "o", "o"] +[1394.741538, "o", "u"] +[1394.8873, "o", "l"] +[1394.991717, "o", "d"] +[1395.119869, "o", " "] +[1395.764404, "o", "h"] +[1395.81421, "o", "a"] +[1395.96834, "o", "v"] +[1396.047828, "o", "e"] +[1396.087582, "o", " "] +[1396.24761, "o", "t"] +[1396.51192, "o", "w"] +[1396.608042, "o", "o"] +[1396.706285, "o", " "] +[1396.859207, "o", "e"] +[1396.952619, "o", "x"] +[1397.184817, "o", "t"] +[1397.343837, "o", "r"] +[1397.408411, "o", "a"] +[1397.599725, "o", " "] +[1400.600069, "o", "v"] +[1400.655484, "o", "a"] +[1400.767453, "o", "l"] +[1400.91153, "o", "u"] +[1400.983774, "o", "e"] +[1401.175892, "o", "s"] +[1401.208238, "o", " "] +[1401.319824, "o", "o"] +[1401.502175, "o", "n"] +[1401.582375, "o", " "] +[1401.63978, "o", "t"] +[1401.768209, "o", "h"] +[1401.823535, "o", "e"] +[1401.888547, "o", " "] +[1402.096021, "o", "s"] +[1402.263991, "o", "t"] +[1402.335885, "o", "a"] +[1403.287574, "o", "c"] +[1403.423212, "o", "k"] +[1405.773719, "o", ":"] +[1405.903731, "o", " "] +[1406.199721, "o", "o"] +[1406.368541, "o", "l"] +[1406.416058, "o", "d"] +[1406.503703, "o", " "] +[1406.701576, "o", "E"] +[1406.904167, "o", "S"] +[1407.007965, "o", "P"] +[1407.240691, "o", " "] +[1407.39138, "o", "a"] +[1407.527755, "o", "n"] +[1407.608416, "o", "d"] +[1407.695859, "o", " "] +[1407.807966, "o", "o"] +[1407.976125, "o", "l"] +[1408.048128, "o", "d"] +[1408.141001, "o", " "] +[1408.353815, "o", "S"] +[1408.495468, "o", "S"] +[1410.367923, "o", "\r\n(gdb) "] +[1969.053941, "o", "# we should have two extra values on stack: old ESP and old SS"] +[1969.262191, "o", "\r\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[Cprint /x $cs\u001b[K"] +[1969.635988, "o", "\r\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C# notice that CS is different "] +[1970.061803, "o", "\r\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[Cprint (void *)(*(uint32_t*)($esp+16))"] +[1970.590698, "o", "\b"] +[1970.742563, "o", "\b"] +[1970.877824, "o", "\b"] +[1971.518067, "o", "\b"] +[1971.789987, "o", "\u001b[1P6))\b\b\b"] +[1971.925709, "o", "\u001b[1P))\b\b"] +[1972.062369, "o", "2))\b\b"] +[1972.414001, "o", "0))\b\b"] +[1975.022556, "o", "\r\n"] +[1975.023378, "o", "$10 = (void *) \u001b[34m0xbfe27990\u001b[m\r\n(gdb) "] +[1976.069818, "o", "print (void *)(*(uint32_t*)($esp+20))"] +[1976.403385, "o", "\b"] +[1976.549892, "o", "\b"] +[1976.709579, "o", "\b"] +[1977.038119, "o", "\b"] +[1979.18221, "o", "\u001b[1P0))\b\b\b"] +[1979.334363, "o", "\u001b[1P))\b\b"] +[1979.446076, "o", "2))\b\b"] +[1979.598023, "o", "4))\b\b"] +[1981.981611, "o", "\r\n"] +[1981.982297, "o", "$11 = (void *) \u001b[34m0x7b\u001b[m\r\n(gdb) "] +[1987.957905, "o", "#"] +[1988.1735, "o", " "] +[1988.405662, "o", "n"] +[1988.453735, "o", "o"] +[1988.565807, "o", "t"] +[1988.725502, "o", "i"] +[1988.858572, "o", "c"] +[1988.926557, "o", "e"] +[1989.043672, "o", " "] +[1989.14232, "o", "t"] +[1989.261751, "o", "h"] +[1989.301689, "o", "a"] +[1989.405339, "o", "t"] +[1989.541661, "o", " "] +[1992.358583, "o", "S"] +[1992.501591, "o", "S"] +[1992.677657, "o", " "] +[1992.982405, "o", "i"] +[1993.088714, "o", "s"] +[1993.267478, "o", " "] +[1993.477794, "o", "a"] +[1993.582886, "o", "l"] +[1994.310019, "o", "s"] +[1994.421693, "o", "o"] +[1994.501651, "o", " "] +[1997.285443, "o", "d"] +[1997.421795, "o", "i"] +[1997.533942, "o", "f"] +[1997.678316, "o", "f"] +[1997.8058, "o", "e"] +[1997.965947, "o", "r"] +[1998.085951, "o", "e"] +[1998.357808, "o", "n"] +[1998.494273, "o", "t"] +[1998.651186, "o", " "] +[1998.827228, "o", "t"] +[1998.967293, "o", "h"] +[1999.030052, "o", "en"] +[1999.197533, "o", " "] +[1999.55771, "o", "t"] +[1999.637997, "o", "h"] +[1999.741665, "o", "e"] +[1999.849221, "o", " "] +[2000.044744, "o", "c"] +[2000.117566, "o", "u"] +[2000.278172, "o", "r"] +[2000.413666, "o", "r"] +[2000.493754, "o", "e"] +[2000.557567, "o", "n"] +[2000.715842, "o", "t"] +[2000.813749, "o", " "] +[2001.213728, "o", "s"] +[2001.357872, "o", "s"] +[2002.046006, "o", "\r\n(gdb) "] +[2002.357919, "o", "p"] +[2002.549923, "o", "r"] +[2002.613931, "o", "i"] +[2002.685486, "o", "n"] +[2002.782565, "o", "t"] +[2002.885912, "o", " "] +[2003.285786, "o", "/"] +[2003.517441, "o", "x"] +[2003.597579, "o", " "] +[2003.984994, "o", "$"] +[2005.414711, "o", "s"] +[2005.55787, "o", "s"] +[2006.605906, "o", "\r\n"] +[2006.606043, "o", "$12 = 0x68\r\n(gdb) "] +[2012.613496, "o", "#"] +[2012.765597, "o", " "] +[2012.869808, "o", "a"] +[2012.981679, "o", "n"] +[2013.045602, "o", "d"] +[2013.157599, "o", " "] +[2013.359393, "o", "t"] +[2013.420279, "o", "o"] +[2013.462073, "o", " "] +[2013.693767, "o", "c"] +[2013.76528, "o", "o"] +[2013.861277, "o", "n"] +[2013.910528, "o", "f"] +[2014.01357, "o", "i"] +[2014.262175, "o", "r"] +[2014.381666, "o", "m"] +[2014.558588, "o", " "] +[2014.693456, "o", "t"] +[2014.805673, "o", "h"] +[2014.93382, "o", "a"] +[2014.97489, "o", "t"] +[2015.946969, "o", " "] +[2020.233739, "o", " 0xbfe27990"] +[2020.77356, "o", " "] +[2021.302126, "o", "i"] +[2021.43803, "o", "s"] +[2021.534611, "o", " "] +[2021.725567, "o", "a"] +[2022.560398, "o", " "] +[2022.830342, "o", "u"] +[2022.933333, "o", "s"] +[2023.038594, "o", "e"] +[2023.101613, "o", "r"] +[2023.714446, "o", "s"] +[2023.826778, "o", "p"] +[2023.909568, "o", "a"] +[2023.965588, "o", "c"] +[2024.062612, "o", "e"] +[2024.141261, "o", " "] +[2024.285484, "o", "s"] +[2024.39776, "o", "t"] +[2024.454087, "o", "a"] +[2024.666852, "o", "c"] +[2024.708226, "o", "k"] +[2024.98393, "o", " "] +[2026.886057, "o", "\r\n(gdb) "] +[2027.325884, "o", "#"] +[2027.71743, "o", " "] +[2027.981779, "o", "l"] +[2028.063253, "o", "e"] +[2028.205611, "o", "t"] +[2028.317878, "o", " "] +[2028.502039, "o", "j"] +[2028.645923, "o", "u"] +[2028.813377, "o", "m"] +[2028.893359, "o", "p"] +[2028.990456, "o", " "] +[2029.172262, "o", "b"] +[2029.245318, "o", "a"] +[2029.334424, "o", "c"] +[2029.42394, "o", "k"] +[2029.510273, "o", " "] +[2029.701962, "o", "t"] +[2029.74145, "o", "o"] +[2029.861488, "o", " "] +[2029.957501, "o", "t"] +[2030.101868, "o", "h"] +[2030.166747, "o", "e"] +[2030.285553, "o", " "] +[2031.173547, "o", "t"] +[2031.253912, "o", "e"] +[2031.334197, "o", "r"] +[2031.421792, "o", "m"] +[2031.501571, "o", "i"] +[2031.621782, "o", "n"] +[2031.72392, "o", "a"] +[2031.829432, "o", "l"] +[2032.245715, "o", "\r\n(gdb) "] +[2033.431185, "o", "quit\r\n"] +[2033.431243, "o", "A debugging session is active.\r\n\r\n\tInferior 1 [process 1] will be detached.\r\n\r\nQuit anyway? (y or n) "] +[2034.445451, "o", "y"] +[2034.541829, "o", "\r\nDetaching from program: /linux/vmlinux, process 1\r\n"] +[2034.542671, "o", "Ending remote debugging.\r\n"] +[2034.542784, "o", "[Inferior 1 (process 1) detached]\r\n"] +[2034.548935, "o", "make: *** [qemu/Makefile:54: gdb] Interrupt\r\n"] +[2034.548997, "o", "\r\n$ "] +[2035.149297, "o", "m"] +[2035.309489, "o", "i"] +[2035.429356, "o", "n"] +[2035.629299, "o", "c"] +[2036.205576, "o", "\b \b"] +[2036.373293, "o", "i"] +[2036.669373, "o", "c"] +[2036.82129, "o", "o"] +[2036.909198, "o", "m"] +[2037.126024, "o", "d"] +[2037.50137, "o", " "] +[2037.741686, "o", "\b \b"] +[2037.861471, "o", "\b \b"] +[2037.973342, "o", " "] +[2038.069309, "o", "-"] +[2039.956556, "o", "D"] +[2041.021427, "o", " "] +[2041.348903, "o", "s"] +[2041.429401, "o", "e"] +[2041.509259, "o", "r"] +[2041.573313, "o", "i"] +[2041.685198, "o", "a"] +[2041.757143, "o", "l"] +[2041.917485, "o", "."] +[2042.108869, "o", "p"] +[2042.253212, "o", "t"] +[2042.477348, "o", "s"] +[2043.621531, "o", "\r\n"] +[2043.622396, "o", "\u001b[!p\u001b[?3;4l\u001b[4l\u001b>\u001b[0m\u001b(B\u001b[?1h\u001b=\u001b[H\u001b[2J"] +[2043.622745, "o", "\u001b[?12l\u001b[?25h\nWelcome to minicom 2.7.1\r\n\nOPTIONS: I18n \r\nCompiled on Dec 23 2019, 02:06:26.\r\nPort serial.pts, 22:50:04\r\n\nPress CTRL-A Z for help on special keys\r\n\n"] +[2044.554156, "o", "\n"] +[2044.562897, "o", "root@qemux86:~# "] +[2047.455019, "o", "t"] +[2047.542613, "o", "o"] +[2047.606554, "o", "p"] +[2047.996772, "o", "\r\n"] +[2048.379869, "o", "\u001b[1;1H\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\u001b[1;1H\u001b[KMem: 31172K used, 209508K free, 172K shrd, 368K buff, 4344K cached"] +[2048.380063, "o", "\r\n"] +[2048.380683, "o", "CPU: 31% usr 68% sys 0% nic 0% idle 0% io 0% irq 0% sirq"] +[2048.380824, "o", "\r\n"] +[2048.381861, "o", "Load average: 1.12 0.81 0.53 4/40 386"] +[2048.38202, "o", "\r\n"] +[2048.456891, "o", "\u001b[0m\u001b(B\u001b[7m PID PPID USER STAT VSZ %VSZ %CPU COMMAND"] +[2048.457162, "o", "\r\n"] +[2048.457905, "o", "\u001b[0m\u001b(B 347 345 root R 3124 1% 26% -sh"] +[2048.45804, "o", "\r\n"] +[2048.472796, "o", " 1 0 root S 2004 1% 16% init [5]"] +[2048.473041, "o", "\r\n"] +[2048.473661, "o", " 374 345 root R 2828 1% 11% top"] +[2048.473774, "o", "\r\n"] +[2048.475752, "o", " 10 2 root IW 0 0% 5% [rcu_sched]"] +[2048.47595, "o", "\r\n"] +[2048.476546, "o", " 345 1 root S 2972 1% 0% -sh"] +[2048.476691, "o", "\r\n"] +[2048.477249, "o", " 198 1 root S 2828 1% 0% /sbin/syslogd -n -O /var/log/messages"] +[2048.477504, "o", "\r\n"] +[2048.477969, "o", " 201 1 root S 2828 1% 0% /sbin/klogd -n"] +[2048.47827, "o", "\r\n"] +[2048.483281, "o", " 187 1 root S 2828 1% 0% udhcpc -R -b -p /var/run/udhcpc.eth0.p\u001b[12;80H"] +[2048.483527, "o", "\r\n"] +[2048.484037, "o", " 207 1 root S 2828 1% 0% /sbin/getty 38400 tty1"] +[2048.48425, "o", "\r\n"] +[2048.484667, "o", " 209 1 root S 2828 1% 0% /sbin/getty 38400 tty2"] +[2048.484914, "o", "\r\n"] +[2048.48537, "o", " 210 1 root S 2828 1% 0% /sbin/getty 38400 tty3"] +[2048.485513, "o", "\r\n"] +[2048.486013, "o", " 211 1 root S 2828 1% 0% /sbin/getty 38400 tty4"] +[2048.48629, "o", "\r\n"] +[2048.490808, "o", " 212 1 root S 2828 1% 0% /sbin/getty 38400 tty5"] +[2048.491041, "o", "\r\n"] +[2048.491575, "o", " 42 2 root SWN 0 0% 0% [kmemleak]"] +[2048.491767, "o", "\r\n"] +[2048.492158, "o", " 9 2 root SW 0 0% 0% [ksoftirqd/0]"] +[2048.492351, "o", "\r\n"] +[2048.492729, "o", " 39 2 root IW 0 0% 0% [kworker/0:2-eve]"] +[2048.492892, "o", "\r\n"] +[2048.493419, "o", " 13 2 root SW 0 0% 0% [kdevtmpfs]"] +[2048.493507, "o", "\r\n"] +[2048.493872, "o", " 38 2 root IW 0 0% 0% [kworker/u2:1-ev]"] +[2048.494245, "o", "\r\n"] +[2048.499119, "o", " 7 2 root IW 0 0% 0% [kworker/u2:0-ev]"] +[2048.499359, "o", "\r\n"] +[2048.499523, "o", " 34 2 root IW< 0 0% 0% [kworker/0:1H-kb]\r"] +[2049.597664, "o", "\n\u001b[23;80H \u001b[24;1H"] +[2049.601147, "o", "root@qemux86:~# "] +[2050.846207, "o", "c"] +[2050.950473, "o", "a"] +[2051.318748, "o", "t"] +[2051.478115, "o", " "] +[2051.6701, "o", "/"] +[2051.789863, "o", "p"] +[2051.862219, "o", "r"] +[2052.118358, "o", "o"] +[2052.357989, "o", "c"] +[2052.462199, "o", "/"] +[2054.438526, "o", "3"] +[2055.37389, "o", "4"] +[2055.414188, "o", "7"] +[2055.710106, "o", "/"] +[2055.966042, "o", "m"] +[2056.030253, "o", "a"] +[2056.126617, "o", "p"] +[2056.237966, "o", "s"] +[2056.751199, "o", "\r\n\u001b[23;80H \u001b[24;1H"] +[2056.776691, "o", "08048000-080c2000 r-xp 00000000 fe:00 669 /bin/busybox.nosuid"] +[2056.776882, "o", "\r\n\u001b[23;80H \u001b[24;1H"] +[2056.777068, "o", "080c2000-080c3000 r--p 00079000 fe:00 669 /bin/busybox.nosuid"] +[2056.777196, "o", "\r\n\u001b[23;80H \u001b[24;1H"] +[2056.77744, "o", "080c3000-080c4000 rw-p 0007a000 fe:00 669 /bin/busybox.nosuid"] +[2056.777507, "o", "\r\n\u001b[23;80H \u001b[24;1H"] +[2056.777832, "o", "080c4000-080c6000 rw-p 00000000 00:00 0 "] +[2056.777966, "o", "\r\n\u001b[23;80H \u001b[24;1H"] +[2056.784163, "o", "08572000-08593000 rw-p 00000000 00:00 0 [heap]"] +[2056.784397, "o", "\r\n\u001b[23;80H \u001b[24;1H"] +[2056.784596, "o", "4480c000-4482e000 r-xp 00000000 fe:00 576 /lib/ld-2.25.so"] +[2056.784771, "o", "\r\n\u001b[23;80H \u001b[24;1H"] +[2056.784985, "o", "4482e000-4482f000 r--p 00021000 fe:00 576 /lib/ld-2.25.so"] +[2056.785371, "o", "\r\n\u001b[23;80H \u001b[24;1H"] +[2056.785477, "o", "4482f000-44830000 rw-p 00022000 fe:00 576 /lib/ld-2.25.so"] +[2056.785705, "o", "\r\n\u001b[23;80H \u001b[24;1H"] +[2056.785842, "o", "44832000-449a9000 r-xp 00000000 fe:00 581 /lib/libc-2.25.so"] +[2056.786153, "o", "\r\n\u001b[23;80H \u001b[24;1H"] +[2056.786344, "o", "449a9000-449ab000 r--p 00176000 fe:00 581 /lib/libc-2.25.so"] +[2056.786584, "o", "\r\n\u001b[23;80H \u001b[24;1H"] +[2056.786747, "o", "449ab000-449ac000 rw-p 00178000 fe:00 581 /lib/libc-2.25.so"] +[2056.786978, "o", "\r\n\u001b[23;80H \u001b[24;1H"] +[2056.787073, "o", "449ac000-449af000 rw-p 00000000 00:00 0 "] +[2056.787189, "o", "\r\n\u001b[23;80H \u001b[24;1H"] +[2056.787414, "o", "449b1000-44a09000 r-xp 00000000 fe:00 641 /lib/libm-2.25.so"] +[2056.787514, "o", "\r\n\u001b[23;80H \u001b[24;1H"] +[2056.787629, "o", "44a09000-44a0a000 r--p 00057000 fe:00 641 /lib/libm-2.25.so"] +[2056.787795, "o", "\r\n\u001b[23;80H \u001b[24;1H"] +[2056.787962, "o", "44a0a000-44a0b000 rw-p 00058000 fe:00 641 /lib/libm-2.25.so"] +[2056.788132, "o", "\r\n\u001b[23;80H \u001b[24;1H"] +[2056.78828, "o", "b7f1e000-b7f44000 rw-p 00000000 00:00 0 "] +[2056.788532, "o", "\r\n\u001b[23;80H \u001b[24;1H"] +[2056.788657, "o", "b7f61000-b7f77000 r-xp 00000000 fe:00 579 /lib/libnsl-2.25.so"] +[2056.788811, "o", "\r\n\u001b[23;80H \u001b[24;1H"] +[2056.789014, "o", "b7f77000-b7f78000 r--p 00015000 fe:00 579 /lib/libnsl-2.25.so"] +[2056.789111, "o", "\r\n\u001b[23;80H \u001b[24;1H"] +[2056.789243, "o", "b7f78000-b7f79000 rw-p 00016000 fe:00 579 /lib/libnsl-2.25.so"] +[2056.789421, "o", "\r\n\u001b[23;80H \u001b[24;1H"] +[2056.789588, "o", "b7f79000-b7f7b000 rw-p 00000000 00:00 0 "] +[2056.789746, "o", "\r\n\u001b[23;80H \u001b[24;1H"] +[2056.789877, "o", "b7f7b000-b7f82000 r-xp 00000000 fe:00 608 /lib/libnss_compat-2.25.so"] +[2056.790117, "o", "\r\n\u001b[23;80H \u001b[24;1H"] +[2056.79029, "o", "b7f82000-b7f83000 ---p 00007000 fe:00 608 /lib/libnss_compat-2.25.so"] +[2056.790438, "o", "\r\n\u001b[23;80H \u001b[24;1H"] +[2056.790597, "o", "b7f83000-b7f84000 r--p 00007000 fe:00 608 /lib/libnss_compat-2.25.so"] +[2056.790802, "o", "\r\n\u001b[23;80H \u001b[24;1H"] +[2056.790899, "o", "b7f84000-b7f85000 rw-p 00008000 fe:00 608 /lib/libnss_compat-2.25.so"] +[2056.791055, "o", "\r\n\u001b[23;80H \u001b[24;1H"] +[2056.791213, "o", "b7f86000-b7f88000 rw-p 00000000 00:00 0 "] +[2056.79138, "o", "\r\n\u001b[23;80H \u001b[24;1H"] +[2056.791605, "o", "b7f88000-b7f8c000 r--p 00000000 00:00 0 [vvar]"] +[2056.791733, "o", "\r\n\u001b[23;80H \u001b[24;1H"] +[2056.791833, "o", "b7f8c000-b7f8e000 r-xp 00000000 00:00 0 [vdso]"] +[2056.792086, "o", "\r\n\u001b[23;80H \u001b[24;1H"] +[2056.792209, "o", "bfe08000-bfe29000 rw-p 00000000 00:00 0 [stack]"] +[2056.792366, "o", "\r\n\u001b[23;80H \u001b[24;1H"] +[2056.81284, "o", "root@qemux86:~# "] +[2062.022254, "o", "#"] +[2062.222725, "o", " "] +[2064.231146, "o", " "] +[2064.231511, "o", "0"] +[2064.231736, "o", "x"] +[2064.232109, "o", "b"] +[2064.232357, "o", "f"] +[2064.23262, "o", "e"] +[2064.23285, "o", "2"] +[2064.23307, "o", "7"] +[2064.233356, "o", "9"] +[2064.233643, "o", "9"] +[2064.233852, "o", "0"] +[2065.526575, "o", " "] +[2068.24684, "o", "p"] +[2068.37458, "o", "o"] +[2069.389314, "o", "i"] +[2069.477747, "o", "n"] +[2069.598037, "o", "t"] +[2069.838099, "o", "s"] +[2069.974039, "o", " "] +[2071.190877, "o", "t"] +[2071.230772, "o", "o"] +[2071.325977, "o", " "] +[2073.598171, "o", "s"] +[2074.46266, "o", "t"] +[2074.559024, "o", "a"] +[2074.798529, "o", "c"] +[2074.862792, "o", "k"] +[2076.69481, "o", " "] +[2076.942032, "o", "i"] +[2077.014286, "o", "n"] +[2077.197566, "o", "d"] +[2077.31816, "o", "e"] +[2077.485824, "o", "e"] +[2077.597694, "o", "d"] diff --git a/Documentation/teaching/res/kernel-virtmem-map.png b/Documentation/teaching/res/kernel-virtmem-map.png new file mode 100644 index 00000000000000..25ffb7a60e94de Binary files /dev/null and b/Documentation/teaching/res/kernel-virtmem-map.png differ diff --git a/Documentation/teaching/res/kernel_threads.cast b/Documentation/teaching/res/kernel_threads.cast new file mode 100644 index 00000000000000..9001f42b20f017 --- /dev/null +++ b/Documentation/teaching/res/kernel_threads.cast @@ -0,0 +1,1350 @@ +{"version": 2, "width": 80, "height": 24, "timestamp": 1615904217, "idle_time_limit": 1.0, "env": {"SHELL": null, "TERM": "xterm"}} +[0.002092, "o", "$ "] +[1.411094, "o", "m"] +[1.492191, "o", "a"] +[1.57441, "o", "k"] +[1.686334, "o", "e"] +[1.750754, "o", " "] +[1.951, "o", "g"] +[2.039054, "o", "d"] +[2.111579, "o", "b"] +[2.350633, "o", "\r\n"] +[2.356524, "o", "gdb -ex \"target remote localhost:1234\" /linux/vmlinux\r\n"] +[2.390298, "o", "\u001b[35;1m\u001b[35;1mGNU gdb \u001b[m\u001b[35;1m(Ubuntu 9.2-0ubuntu1~20.04) \u001b[m\u001b[35;1m9.2\u001b[m\u001b[35;1m\r\n\u001b[m\u001b[mCopyright (C) 2020 Free Software Foundation, Inc.\r\nLicense GPLv3+: GNU GPL version 3 or later \r\nThis is free software: you are free to change and redistribute it.\r\nThere is NO WARRANTY, to the extent permitted by law.\r\nType \"show copying\" and \"show warranty\" for details.\r\nThis GDB was configured as \"x86_64-linux-gnu\".\r\nType \"show configuration\" for configuration details.\r\nFor bug reporting instructions, please see:\r\n.\r\nFind the GDB manual and other documentation resources online at:\r\n .\r\n\r\nFor help, type \"help\".\r\nType \"apropos word\" to search for commands related to \"word\"...\r\n"] +[2.390737, "o", "Reading symbols from \u001b[32m/linux/vmlinux\u001b[m...\r\n"] +[2.944725, "o", "Remote debugging using localhost:1234\r\n"] +[2.957375, "o", "\u001b[33mdefault_idle\u001b[m () at \u001b[32march/x86/kernel/process.c\u001b[m:689\r\n"] +[2.95746, "o", "689\t}\r\n"] +[2.957779, "o", "(gdb) "] +[5.428286, "o", "l"] +[5.524636, "o", "s"] +[5.780627, "o", "-"] +[6.367098, "o", "p"] +[6.509506, "o", "x"] +[7.278986, "o", "\r\n"] +[7.279172, "o", "Undefined command: \"ls-px\". Try \"help\".\r\n(gdb) "] +[8.676984, "o", "l"] +[8.850763, "o", "x"] +[9.039146, "o", "-"] +[9.318467, "o", "p"] +[9.500473, "o", "s"] +[9.750891, "o", "\r\n"] +[9.751114, "o", " TASK PID COMM\r\n"] +[9.75236, "o", "0xc17d02c0 0 swapper/0\r\n"] +[9.75369, "o", "0xc2530040 1 init\r\n"] +[9.7548, "o", "0xc2534080 2 kthreadd\r\n"] +[9.755807, "o", "0xc25360c0 3 rcu_gp\r\n"] +[9.756742, "o", "0xc2537100 4 rcu_par_gp\r\n"] +[9.757706, "o", "0xc2546180 6 kworker/0:0H\r\n"] +[9.758707, "o", "0xc25481c0 7 kworker/u2:0\r\n"] +[9.759604, "o", "0xc2549000 8 mm_percpu_wq\r\n"] +[9.76062, "o", "0xc254b040 9 ksoftirqd/0\r\n"] +[9.761626, "o", "0xc254c080 10 rcu_sched\r\n"] +[9.762529, "o", "0xc254e0c0 11 migration/0\r\n"] +[9.763454, "o", "0xc2572100 12 cpuhp/0\r\n"] +[9.764416, "o", "0xc2576140 13 kdevtmpfs\r\n"] +[9.765286, "o", "0xc2593180 14 netns\r\n"] +[9.766329, "o", "0xc26211c0 15 oom_reaper\r\n"] +[9.76739, "o", "0xc2623000 16 writeback\r\n"] +[9.768316, "o", "0xc25cd1c0 32 kblockd\r\n"] +[9.769188, "o", "0xc2638180 33 kworker/0:1\r\n"] +[9.770027, "o", "0xc2637140 34 kworker/0:1H\r\n"] +[9.770898, "o", "0xc2636100 35 kswapd0\r\n"] +[9.771804, "o", "0xc2634080 37 acpi_thermal_pm\r\n"] +[9.77276, "o", "0xc26350c0 38 kworker/u2:1\r\n"] +[9.773599, "o", "\u001b[m--Type for more, q to quit, c to continue without paging--"] +[16.511474, "o", "\r\n0xc2631040 39 kworker/0:2\r\n"] +[16.512921, "o", "0xc2630000 40 khvcd\r\n"] +[16.513907, "o", "0xc25d9180 41 ipv6_addrconf\r\n"] +[16.514686, "o", "0xc2627080 42 kmemleak\r\n"] +[16.515495, "o", "0xc26170c0 43 jbd2/vda-8\r\n"] +[16.516313, "o", "0xc2625040 44 ext4-rsv-conver\r\n"] +[16.517171, "o", "0xc842a1c0 187 udhcpc\r\n"] +[16.518006, "o", "0xcb365040 198 syslogd\r\n"] +[16.518734, "o", "0xc6730140 201 klogd\r\n"] +[16.519451, "o", "0xc260f100 207 getty\r\n"] +[16.520309, "o", "0xcb37a080 208 getty\r\n"] +[16.521243, "o", "0xc8431000 209 getty\r\n"] +[16.522122, "o", "0xc4b8a180 210 getty\r\n"] +[16.523042, "o", "0xcb375100 211 getty\r\n"] +[16.524007, "o", "0xca6931c0 212 getty\r\n"] +[16.524667, "o", "(gdb) "] +[18.27663, "o", " "] +[19.237048, "o", "\b\u001b[K"] +[19.663197, "o", "#"] +[19.822869, "o", " "] +[21.012324, "o", "n"] +[21.079126, "o", "o"] +[21.3069, "o", "t"] +[21.565299, "o", "e"] +[21.791988, "o", " "] +[21.989943, "o", "t"] +[22.079654, "o", "h"] +[22.152775, "o", "a"] +[22.271404, "o", "t"] +[22.358257, "o", " "] +[22.495177, "o", "t"] +[22.616552, "o", "h"] +[22.689874, "o", "e"] +[22.759133, "o", "r"] +[22.851398, "o", "e"] +[22.944484, "o", " "] +[23.047542, "o", "a"] +[23.143223, "o", "r"] +[23.236144, "o", "e"] +[23.30143, "o", " "] +[23.479486, "o", "s"] +[23.726797, "o", "e"] +[23.996443, "o", "v"] +[24.285069, "o", "e"] +[24.769554, "o", "\b\u001b[K"] +[24.888103, "o", "\b\u001b[K"] +[25.372766, "o", "v"] +[25.486693, "o", "e"] +[25.780145, "o", "r"] +[25.831058, "o", "a"] +[25.957718, "o", "l"] +[26.079508, "o", " "] +[26.253337, "o", "t"] +[26.309159, "o", "a"] +[26.511159, "o", "s"] +[26.601906, "o", "k"] +[26.721828, "o", "s"] +[27.244749, "o", " "] +[28.574755, "o", "t"] +[28.854002, "o", "a"] +[29.471312, "o", "\b\u001b[K"] +[29.676745, "o", "h"] +[29.754496, "o", "a"] +[29.887311, "o", "t"] +[29.958997, "o", " "] +[30.166519, "o", "s"] +[30.315248, "o", "t"] +[30.393086, "o", "a"] +[30.975307, "o", "r"] +[31.599184, "o", "t"] +[31.752243, "o", " "] +[31.879063, "o", "w"] +[31.967152, "o", "i"] +[32.078599, "o", "t"] +[32.166707, "o", "h"] +[32.270994, "o", " "] +[32.471433, "o", "t"] +[32.542586, "o", "h"] +[32.686761, "o", "e"] +[32.763813, "o", " "] +[33.296049, "o", "L"] +[33.87867, "o", "\b\u001b[K"] +[35.524303, "o", "K"] +[36.343905, "o", "\b\u001b[K"] +[36.510508, "o", "k"] +[37.48386, "o", " "] +[37.8357, "o", "l"] +[37.922486, "o", "e"] +[38.118439, "o", "t"] +[38.276721, "o", "t"] +[38.376646, "o", "e"] +[38.503033, "o", "r"] +[38.911624, "o", "\r\n(gdb) "] +[39.276285, "o", "#"] +[39.519145, "o", " "] +[39.750673, "o", "t"] +[39.85472, "o", "h"] +[39.950603, "o", "e"] +[40.108249, "o", "s"] +[40.271864, "o", "e"] +[40.511774, "o", " "] +[40.801505, "o", "("] +[41.030531, "o", "b"] +[41.118878, "o", "u"] +[41.230505, "o", "t"] +[41.314769, "o", " "] +[41.740861, "o", "n"] +[41.824881, "o", "o"] +[42.019385, "o", "t"] +[42.14018, "o", " "] +[42.254553, "o", "o"] +[42.338673, "o", "n"] +[42.493789, "o", "l"] +[42.662862, "o", "y"] +[42.75931, "o", " "] +[42.916086, "o", "t"] +[43.020488, "o", "h"] +[43.084785, "o", "e"] +[43.263329, "o", "s"] +[43.362693, "o", "e"] +[43.608998, "o", ")"] +[43.774713, "o", " "] +[43.894708, "o", "a"] +[44.062643, "o", "r"] +[44.118689, "o", "e"] +[44.2443, "o", " "] +[44.431308, "o", "k"] +[44.550679, "o", "e"] +[44.612908, "o", "r"] +[44.708174, "o", "n"] +[44.790351, "o", "e"] +[44.894934, "o", "l"] +[44.974263, "o", " "] +[45.204614, "o", "t"] +[45.327253, "o", "h"] +[45.388938, "o", "r"] +[45.438216, "o", "e"] +[45.518535, "o", "a"] +[45.773446, "o", "d"] +[45.822653, "o", "s"] +[46.775316, "o", "\r\n(gdb) "] +[53.767289, "o", "#"] +[54.624466, "o", " "] +[54.85786, "o", "t"] +[54.962973, "o", "h"] +[55.108379, "o", "e"] +[55.926781, "o", " "] +[56.132174, "o", "p"] +[56.246669, "o", "s"] +[56.342818, "o", " "] +[56.523069, "o", "u"] +[56.729793, "o", "t"] +[56.791601, "o", "i"] +[56.998652, "o", "l"] +[57.102951, "o", "i"] +[57.433973, "o", "t"] +[57.510405, "o", "y"] +[57.663099, "o", " "] +[57.931719, "o", "m"] +[58.030756, "o", "a"] +[58.234054, "o", "r"] +[58.334973, "o", "k"] +[58.532386, "o", "s"] +[58.678812, "o", " "] +[58.88646, "o", "t"] +[58.982125, "o", "h"] +[59.094825, "o", "e"] +[59.236552, "o", "n"] +[59.630822, "o", " "] +[59.967128, "o", "w"] +[60.050881, "o", "i"] +[60.182217, "o", "t"] +[60.246628, "o", "h"] +[60.499501, "o", " "] +[60.996343, "o", "b"] +[61.062202, "o", "r"] +[61.15963, "o", "a"] +[61.606658, "o", "c"] +[61.710465, "o", "k"] +[61.790986, "o", "e"] +[61.926711, "o", "t"] +[62.094782, "o", "s"] +[62.243636, "o", " "] +[63.711846, "o", "["] +[63.806682, "o", "["] +[64.356046, "o", "\b\u001b[K"] +[64.620136, "o", "]"] +[65.375322, "o", "\r\n(gdb) "] +[68.557209, "o", "quit\r\n"] +[68.557963, "o", "A debugging session is active.\r\n\r\n\tInferior 1 [process 1] will be detached.\r\n\r\nQuit anyway? (y or n) "] +[69.452475, "o", "y"] +[69.607083, "o", "\r\nDetaching from program: /linux/vmlinux, process 1\r\n"] +[69.608099, "o", "Ending remote debugging.\r\n"] +[69.608145, "o", "[Inferior 1 (process 1) detached]\r\n"] +[69.613792, "o", "$ "] +[72.387606, "o", "m"] +[72.556239, "o", "i"] +[72.678491, "o", "n"] +[72.735372, "o", "i"] +[72.83213, "o", "c"] +[72.939861, "o", "o"] +[72.974432, "o", "m"] +[73.12681, "o", "d"] +[73.245949, "o", " "] +[73.451736, "o", "-"] +[73.787899, "o", "D"] +[73.926861, "o", " "] +[74.062585, "o", "s"] +[74.110529, "o", "e"] +[74.20401, "o", "r"] +[74.254354, "o", "i"] +[74.383866, "o", "a"] +[74.964635, "o", "l"] +[75.158299, "o", "."] +[75.358153, "o", "p"] +[75.483515, "o", "t"] +[75.690025, "o", "s"] +[75.791116, "o", "\r\n"] +[75.792551, "o", "sh: 2: minicomd: not found\r\n$ "] +[77.369862, "o", "^[[A"] +[78.147033, "o", "\b \b"] +[78.246726, "o", "\b \b"] +[78.374032, "o", "\b \b\b \b"] +[78.726508, "o", "m"] +[78.830234, "o", "i"] +[78.942715, "o", "n"] +[78.998286, "o", "i"] +[79.16445, "o", "c"] +[79.219047, "o", "o"] +[79.28939, "o", "m"] +[79.699735, "o", " "] +[79.851281, "o", "-"] +[80.22679, "o", "D"] +[80.359667, "o", " "] +[80.494074, "o", "s"] +[80.58978, "o", "e"] +[80.665833, "o", "r"] +[80.734429, "o", "i"] +[80.831438, "o", "a"] +[80.902218, "o", "l"] +[81.065769, "o", "."] +[81.235249, "o", "p"] +[81.354437, "o", "t"] +[81.667451, "o", "s"] +[82.015056, "o", "\r\n"] +[82.0318, "o", "\u001b[!p\u001b[?3;4l\u001b[4l\u001b>\u001b[0m\u001b(B\u001b[?1h\u001b=\u001b[H\u001b[2J"] +[82.032263, "o", "\u001b[?12l\u001b[?25h\nWelcome to minicom 2.7.1\r\n\nOPTIONS: I18n \r\nCompiled on Dec 23 2019, 02:06:26.\r\nPort serial.pts, 11:09:49\r\n\nPress CTRL-A Z for help on special keys\r\n\n"] +[82.665267, "o", "\n"] +[82.670205, "o", "Poky (Yocto Project Reference Distro) 2.3 qemux86 /dev/hvc0"] +[82.670382, "o", "\r\n"] +[82.671379, "o", "\n"] +[82.673617, "o", "qemux86 login: "] +[83.310781, "o", "r"] +[83.35226, "o", "o"] +[83.486962, "o", "o"] +[83.559101, "o", "t"] +[83.640496, "o", "\r\n"] +[83.807489, "o", "root@qemux86:~# "] +[84.151513, "o", "p"] +[84.239303, "o", "s"] +[84.353291, "o", "\r\n"] +[84.365744, "o", " PID USER VSZ STAT COMMAND"] +[84.366019, "o", "\r\n"] +[84.382473, "o", " 1 root 2004 S init [5]"] +[84.382916, "o", "\r\n"] +[84.384492, "o", " 2 root 0 SW [kthreadd]"] +[84.384691, "o", "\r\n"] +[84.385851, "o", " 3 root 0 IW< [rcu_gp]"] +[84.386047, "o", "\r\n"] +[84.387605, "o", " 4 root 0 IW< [rcu_par_gp]"] +[84.387746, "o", "\r\n"] +[84.388848, "o", " 6 root 0 IW< [kworker/0:0H-ev]"] +[84.388994, "o", "\r\n"] +[84.38998, "o", " 7 root 0 IW [kworker/u2:0-ev]"] +[84.390164, "o", "\r\n"] +[84.391562, "o", " 8 root 0 IW< [mm_percpu_wq]"] +[84.391813, "o", "\r\n"] +[84.392754, "o", " 9 root 0 SW [ksoftirqd/0]"] +[84.392996, "o", "\r\n"] +[84.393827, "o", " 10 root 0 IW [rcu_sched]"] +[84.394025, "o", "\r\n\u001b[23;80H \u001b[24;1H"] +[84.395414, "o", " 11 root 0 SW [migration/0]"] +[84.395571, "o", "\r\n\u001b[23;80H \u001b[24;1H"] +[84.396462, "o", " 12 root 0 SW [cpuhp/0]"] +[84.39663, "o", "\r\n\u001b[23;80H \u001b[24;1H"] +[84.397513, "o", " 13 root 0 SW [kdevtmpfs]"] +[84.397659, "o", "\r\n\u001b[23;80H \u001b[24;1H"] +[84.399276, "o", " 14 root 0 IW< [netns]"] +[84.399499, "o", "\r\n\u001b[23;80H \u001b[24;1H"] +[84.40051, "o", " 15 root 0 SW [oom_reaper]"] +[84.400715, "o", "\r\n\u001b[23;80H \u001b[24;1H"] +[84.4016, "o", " 16 root 0 IW< [writeback]"] +[84.401763, "o", "\r\n\u001b[23;80H \u001b[24;1H"] +[84.403336, "o", " 32 root 0 IW< [kblockd]"] +[84.403515, "o", "\r\n\u001b[23;80H \u001b[24;1H"] +[84.404416, "o", " 33 root 0 IW [kworker/0:1-mm_]"] +[84.404597, "o", "\r\n\u001b[23;80H \u001b[24;1H"] +[84.405656, "o", " 34 root 0 IW< [kworker/0:1H-kb]"] +[84.405869, "o", "\r\n\u001b[23;80H \u001b[24;1H"] +[84.407531, "o", " 35 root 0 SW [kswapd0]"] +[84.40772, "o", "\r\n\u001b[23;80H \u001b[24;1H"] +[84.408696, "o", " 37 root 0 IW< [acpi_thermal_pm]"] +[84.408838, "o", "\r\n\u001b[23;80H \u001b[24;1H"] +[84.409754, "o", " 38 root 0 IW [kworker/u2:1-ev]"] +[84.409941, "o", "\r\n\u001b[23;80H \u001b[24;1H"] +[84.411415, "o", " 39 root 0 IW [kworker/0:2-eve]"] +[84.411603, "o", "\r\n\u001b[23;80H \u001b[24;1H"] +[84.412584, "o", " 40 root 0 SW [khvcd]"] +[84.412802, "o", "\r\n\u001b[23;80H \u001b[24;1H"] +[84.413892, "o", " 41 root 0 IW< [ipv6_addrconf]"] +[84.414073, "o", "\r\n\u001b[23;80H \u001b[24;1H"] +[84.415589, "o", " 42 root 0 SWN [kmemleak]"] +[84.415761, "o", "\r\n\u001b[23;80H \u001b[24;1H"] +[84.416609, "o", " 43 root 0 SW [jbd2/vda-8]"] +[84.416767, "o", "\r\n\u001b[23;80H \u001b[24;1H"] +[84.417633, "o", " 44 root 0 IW< [ext4-rsv-conver]"] +[84.417776, "o", "\r\n\u001b[23;80H \u001b[24;1H"] +[84.419678, "o", " 187 root 2828 S udhcpc -R -b -p /var/run/udhcpc.eth0.pid -i eth0"] +[84.419824, "o", "\r\n\u001b[23;80H \u001b[24;1H"] +[84.421049, "o", " 198 root 2828 S /sbin/syslogd -n -O /var/log/messages"] +[84.421222, "o", "\r\n\u001b[23;80H \u001b[24;1H"] +[84.422318, "o", " 201 root 2828 S /sbin/klogd -n"] +[84.422683, "o", "\r\n\u001b[23;80H \u001b[24;1H"] +[84.424349, "o", " 207 root 2828 S /sbin/getty 38400 tty1"] +[84.424461, "o", "\r\n\u001b[23;80H \u001b[24;1H"] +[84.425792, "o", " 208 root 2972 S -sh\r\n\u001b[23;80H \u001b[24;1H"] +[84.427611, "o", " 209 root 2828 S /sbin/getty 38400 tty2"] +[84.427768, "o", "\r\n\u001b[23;80H \u001b[24;1H"] +[84.428986, "o", " 210 root 2828 S /sbin/getty 38400 tty3"] +[84.429144, "o", "\r\n\u001b[23;80H \u001b[24;1H"] +[84.430325, "o", " 211 root 2828 S /sbin/getty 38400 tty4"] +[84.430573, "o", "\r\n\u001b[23;80H \u001b[24;1H"] +[84.432246, "o", " 212 root 2828 S /sbin/getty 38400 tty5"] +[84.43239, "o", "\r\n\u001b[23;80H \u001b[24;1H"] +[84.433849, "o", " 932 root 2976 R ps"] +[84.434032, "o", "\r\n\u001b[23;80H \u001b[24;1H"] +[84.437908, "o", "root@qemux86:~# "] +[99.144067, "o", "\r\n\u001b[23;80H \u001b[24;1H"] +[99.145467, "o", "root@qemux86:~# "] +[99.862108, "o", "\u001b[0m\u001b(B\u001b[7m\r\u001b[K\u001b[?12l\u001b[?25h\u001b[?25lCTRL-A Z for help | 115200 8N1 | NOR | Minicom 2.7.1 | VT102 | Offline | al.pts\u001b[?12l\u001b[?25h\u001b[24;17H"] +[100.044152, "o", "\u001b[8;30H\u001b[?25l\u001b[0m\u001b(B\u001b(0lqqqqqqqqqqqqqqqqqqqqqqk\u001b[9;30Hx\u001b[0m\u001b(B Leave Minicom? \u001b[0m\u001b(B\u001b(0x\u001b[10;30Hx\u001b[0m\u001b(B No \u001b[0m\u001b(B\u001b(0x\u001b[11;30Hmqqqqqqqqqqqqqqqqqqqqqqj\u001b[10;51H\u001b[?25l\u001b[10;33H\u001b[0m\u001b(B\u001b[7m Yes "] +[100.583354, "o", "\u001b[?12l\u001b[?25h\u001b[8;1H\u001b[0m\u001b(B 40 root 0 SW [khvcd] \u001b[9;1H 41 root 0 IW< [ipv6_addrconf] \u001b[10;1H 42 root 0 SWN [kmemleak] \u001b[11;1H 43 root 0 SW [jbd2/vda-8] \u001b[24;17H\u001b[0m\u001b(B\u001b[7m\u001b[?12l\u001b[?25h\u001b[?12l\u001b[?25h"] +[100.583429, "o", "\u001b[0m\u001b(B\u001b[H\u001b[2J\u001b[?12l\u001b[?25h\u001b[?1l\u001b>\u001b[!p\u001b[?3;4l\u001b[4l\u001b>"] +[100.58371, "o", "$ "] +[101.414149, "o", "^[[A"] +[101.566416, "o", "^[[A"] +[101.886558, "o", "^[[A"] +[102.335042, "o", "\b \b"] +[102.842375, "o", "\b \b"] +[102.872862, "o", "\b \b\b \b"] +[102.904871, "o", "\b \b"] +[102.937132, "o", "\b \b"] +[102.969648, "o", "\b \b\b \b"] +[103.01399, "o", "\b \b"] +[103.046078, "o", "\b \b"] +[103.079602, "o", "\b \b\b \b"] +[103.486251, "o", "m"] +[103.572083, "o", "a"] +[103.611934, "o", "m"] +[103.642732, "o", "k"] +[103.734628, "o", "e"] +[103.837599, "o", " "] +[104.06262, "o", "g"] +[104.35936, "o", "\b \b"] +[104.491737, "o", "\b \b"] +[104.620252, "o", "\b \b"] +[104.731786, "o", "\b \b"] +[104.904506, "o", "k"] +[105.139616, "o", "\b \b"] +[105.26003, "o", "\b \b"] +[105.374543, "o", "k"] +[105.47003, "o", "e"] +[105.550545, "o", " "] +[105.946754, "o", "g"] +[106.106541, "o", "d"] +[106.171182, "o", "b"] +[106.471309, "o", "\r\n"] +[106.477162, "o", "gdb -ex \"target remote localhost:1234\" /linux/vmlinux\r\n"] +[106.511747, "o", "\u001b[35;1m\u001b[35;1mGNU gdb \u001b[m\u001b[35;1m(Ubuntu 9.2-0ubuntu1~20.04) \u001b[m\u001b[35;1m9.2\u001b[m\u001b[35;1m\r\n\u001b[m\u001b[mCopyright (C) 2020 Free Software Foundation, Inc.\r\nLicense GPLv3+: GNU GPL version 3 or later \r\nThis is free software: you are free to change and redistribute it.\r\nThere is NO WARRANTY, to the extent permitted by law.\r\nType \"show copying\" and \"show warranty\" for details.\r\nThis GDB was configured as \"x86_64-linux-gnu\".\r\nType \"show configuration\" for configuration details.\r\nFor bug reporting instructions, please see:\r\n.\r\nFind the GDB manual and other documentation resources online at:\r\n .\r\n\r\nFor help, type \"help\".\r\nType \"apropos word\" to search for commands related to \"word\"...\r\n"] +[106.512051, "o", "Reading symbols from \u001b[32m/linux/vmlinux\u001b[m...\r\n"] +[107.070342, "o", "Remote debugging using localhost:1234\r\n"] +[107.082871, "o", "\u001b[33mdefault_idle\u001b[m () at \u001b[32march/x86/kernel/process.c\u001b[m:689\r\n"] +[107.082941, "o", "689\t}\r\n"] +[107.083245, "o", "(gdb) "] +[108.67027, "o", "l"] +[108.79013, "o", "s"] +[110.752881, "o", "-"] +[111.158637, "o", "\b\u001b[K"] +[111.294213, "o", "\b\u001b[K"] +[111.375362, "o", "x"] +[111.56393, "o", "-"] +[112.075891, "o", "p"] +[112.716755, "o", "s"] +[112.983024, "o", "\r\n"] +[112.983204, "o", " TASK PID COMM\r\n"] +[112.984424, "o", "0xc17d02c0 0 swapper/0\r\n"] +[112.985648, "o", "0xc2530040 1 init\r\n"] +[112.986756, "o", "0xc2534080 2 kthreadd\r\n"] +[112.987788, "o", "0xc25360c0 3 rcu_gp\r\n"] +[112.98883, "o", "0xc2537100 4 rcu_par_gp\r\n"] +[112.989979, "o", "0xc2546180 6 kworker/0:0H\r\n"] +[112.990945, "o", "0xc25481c0 7 kworker/u2:0\r\n"] +[112.991808, "o", "0xc2549000 8 mm_percpu_wq\r\n"] +[112.992762, "o", "0xc254b040 9 ksoftirqd/0\r\n"] +[112.993594, "o", "0xc254c080 10 rcu_sched\r\n"] +[112.994443, "o", "0xc254e0c0 11 migration/0\r\n"] +[112.995327, "o", "0xc2572100 12 cpuhp/0\r\n"] +[112.996231, "o", "0xc2576140 13 kdevtmpfs\r\n"] +[112.997154, "o", "0xc2593180 14 netns\r\n"] +[112.997928, "o", "0xc26211c0 15 oom_reaper\r\n"] +[112.998731, "o", "0xc2623000 16 writeback\r\n"] +[112.999501, "o", "0xc25cd1c0 32 kblockd\r\n"] +[113.000229, "o", "0xc2638180 33 kworker/0:1\r\n"] +[113.000993, "o", "0xc2637140 34 kworker/0:1H\r\n"] +[113.001713, "o", "0xc2636100 35 kswapd0\r\n"] +[113.002441, "o", "0xc2634080 37 acpi_thermal_pm\r\n"] +[113.003341, "o", "0xc26350c0 38 kworker/u2:1\r\n"] +[113.004215, "o", "\u001b[m--Type for more, q to quit, c to continue without paging--"] +[114.273671, "o", "q"] +[116.3102, "o", "\r\n"] +[116.31037, "o", "Quit\r\n(gdb) "] +[117.96624, "o", "#"] +[118.254117, "o", " "] +[118.40776, "o", "l"] +[118.491374, "o", "e"] +[118.654629, "o", "t"] +[118.828047, "o", "s"] +[118.945063, "o", " "] +[119.111903, "o", "i"] +[119.185942, "o", "n"] +[119.254491, "o", "s"] +[119.411189, "o", "p"] +[119.478562, "o", "e"] +[119.566175, "o", "c"] +[120.054814, "o", "t"] +[121.318467, "o", " "] +[122.796389, "o", "t"] +[122.876021, "o", "h"] +[123.006447, "o", "e"] +[123.121491, "o", " "] +[123.486383, "o", "k"] +[123.662142, "o", "s"] +[123.761024, "o", "o"] +[123.827853, "o", "f"] +[124.056733, "o", "t"] +[124.884417, "o", "i"] +[124.998466, "o", "r"] +[125.090886, "o", "q"] +[125.458436, "o", "d"] +[126.11713, "o", " "] +[126.34268, "o", "k"] +[126.423265, "o", "e"] +[126.470594, "o", "r"] +[126.581815, "o", "n"] +[126.662065, "o", "e"] +[126.766791, "o", "l"] +[126.867196, "o", " "] +[127.013848, "o", "t"] +[127.11873, "o", "h"] +[127.196182, "o", "r"] +[127.252684, "o", "e"] +[127.302566, "o", "a"] +[127.525289, "o", "d"] +[127.612912, "o", "s"] +[128.91899, "o", "\b\u001b[K"] +[129.079044, "o", "\r\n"] +[129.079153, "o", "(gdb) "] +[134.822513, "o", "p"] +[134.98218, "o", "r"] +[135.04637, "o", "i"] +[135.095337, "o", "n"] +[135.213449, "o", "t"] +[135.367892, "o", " "] +[135.948456, "o", "("] +[137.420016, "o", "("] +[137.64176, "o", "s"] +[137.791566, "o", "t"] +[137.847118, "o", "r"] +[137.958465, "o", "u"] +[138.060018, "o", "c"] +[138.267908, "o", "t"] +[138.871323, "o", " "] +[139.042958, "o", "t"] +[139.103241, "o", "a"] +[139.242309, "o", "s"] +[139.308529, "o", "k"] +[139.510379, "o", "_"] +[139.682368, "o", "s"] +[139.887398, "o", "u"] +[140.423883, "o", "\b\u001b[K"] +[140.518478, "o", "t"] +[140.671075, "o", "r"] +[140.742392, "o", "u"] +[140.875932, "o", "c"] +[141.086603, "o", "t"] +[141.483711, "o", "*"] +[141.670866, "o", ")"] +[143.13397, "o", "0xc254b040"] +[144.158432, "o", ")"] +[144.861881, "o", "-"] +[145.332921, "o", ">"] +[145.765653, "o", "\u0007"] +[146.052545, "o", "\r\nDisplay all 156 possibilities? (y or n)"] +[148.83855, "o", "\r\n(gdb) print ((struct task_struct*)0xc254b040)->"] +[151.566382, "o", "m"] +[151.708377, "o", "m"] +[152.208982, "o", " "] +[152.590906, "o", "\r\n"] +[152.606852, "o", "$1 = (struct mm_struct *) \u001b[34m0x0\u001b[m\r\n(gdb) "] +[153.7479, "o", "#"] +[153.990068, "o", " "] +[154.220116, "o", "n"] +[154.293473, "o", "o"] +[154.476203, "o", "t"] +[154.549343, "o", "i"] +[154.718407, "o", "c"] +[154.792436, "o", "e"] +[155.435468, "o", " "] +[155.623648, "o", "t"] +[155.71801, "o", "h"] +[155.820625, "o", "a"] +[155.910595, "o", "t"] +[156.03814, "o", " "] +[156.275706, "o", "t"] +[156.374535, "o", "h"] +[156.467813, "o", "e"] +[156.534298, "o", "r"] +[156.606573, "o", "e"] +[156.708536, "o", " "] +[156.820267, "o", "i"] +[156.926727, "o", "s"] +[157.036148, "o", " "] +[157.260091, "o", "n"] +[157.332754, "o", "o"] +[157.521488, "o", " "] +[157.621793, "o", "a"] +[157.950278, "o", "d"] +[158.123876, "o", "d"] +[158.332801, "o", "r"] +[158.382158, "o", "e"] +[158.571143, "o", "s"] +[158.688164, "o", "s"] +[158.818238, "o", " "] +[158.910157, "o", "s"] +[159.013842, "o", "p"] +[159.102999, "o", "a"] +[159.299519, "o", "c"] +[159.363577, "o", "e"] +[159.494688, "o", " "] +[159.600096, "o", "a"] +[159.85968, "o", "s"] +[160.022336, "o", "s"] +[160.13205, "o", "o"] +[160.270871, "o", "c"] +[160.335137, "o", "i"] +[160.382535, "o", "a"] +[160.580297, "o", "t"] +[160.636041, "o", "e"] +[160.792826, "o", "d"] +[160.878038, "o", " "] +[160.990215, "o", "w"] +[161.078373, "o", "i"] +[161.214096, "o", "t"] +[161.286032, "o", "h"] +[161.382406, "o", " "] +[161.466374, "o", "t"] +[161.573907, "o", "h"] +[161.654564, "o", "e"] +[161.770776, "o", " "] +[161.918309, "o", "k"] +[162.030649, "o", "e"] +[162.070539, "o", "r"] +[162.155973, "o", "n"] +[162.237838, "o", "e"] +[162.295025, "o", "l"] +[162.42764, "o", " "] +[163.094332, "o", "\b\u001b[K"] +[163.222493, "o", "\b\u001b[K"] +[164.206568, "o", "\b\u001b[K"] +[164.339329, "o", "\b\u001b[K"] +[164.468159, "o", "\b\u001b[K"] +[164.590204, "o", "\b\u001b[K"] +[164.724112, "o", "\b\u001b[K"] +[164.892234, "o", "t"] +[165.055837, "o", "\b\u001b[K"] +[165.198298, "o", "\b\u001b[K"] +[165.335496, "o", "\b\u001b[K"] +[165.468275, "o", "\b\u001b[K"] +[165.62238, "o", "\b\u001b[K"] +[166.246756, "o", "t"] +[166.326615, "o", "h"] +[166.382333, "o", "i"] +[166.4519, "o", "s"] +[166.541987, "o", " "] +[166.708252, "o", "t"] +[166.819081, "o", "a"] +[166.908185, "o", "s"] +[167.053111, "o", "k"] +[167.36667, "o", "\r\n(gdb) "] +[170.791095, "o", "# notice that there is no address space associated with this task"] +[171.750239, "o", "\r\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[21Pprint ((struct task_struct*)0xc254b040)->mm "] +[172.23017, "o", "\b\u001b[K"] +[172.386797, "o", "\b\u001b[K"] +[172.531384, "o", "\b\u001b[K"] +[172.873833, "o", "f"] +[173.203858, "o", "i"] +[173.365186, "o", "les "] +[174.539751, "o", "\b\u001b[K"] +[174.715458, "o", "-"] +[175.180139, "o", ">"] +[175.871471, "o", "\u0007"] +[176.032089, "o", "\r\nclose_on_exec_init fdtab open_fds_init\r\ncount file_lock resize_in_progress\r\nfd_array full_fds_bits_init resize_wait\r\nfdt next_fd \r\n(gdb) print ((struct task_struct*)0xc254b040)->files->"] +[177.45997, "o", "f"] +[177.754961, "o", "d"] +[178.438573, "o", "_"] +[179.441568, "o", "a"] +[179.67653, "o", "r"] +[179.854296, "o", "r"] +[179.966081, "o", "ay "] +[180.934307, "o", "\r\n"] +[180.954674, "o", "$2 = {\u001b[34m0x0\u001b[m \u001b[2m\u001b[m}\r\n(gdb) "] +[182.787841, "o", "#"] +[183.110265, "o", " "] +[183.479449, "o", "a"] +[183.614116, "o", "l"] +[183.783304, "o", "s"] +[183.866187, "o", "o"] +[184.006426, "o", " "] +[184.275627, "o", "n"] +[184.333689, "o", "o"] +[184.82741, "o", "t"] +[184.926539, "o", "i"] +[185.106651, "o", "c"] +[185.165936, "o", "e"] +[185.308254, "o", " "] +[185.430403, "o", "t"] +[185.517961, "o", "h"] +[185.584954, "o", "a"] +[185.670047, "o", "t"] +[186.034584, "o", " "] +[186.222612, "o", "t"] +[186.315392, "o", "h"] +[186.405867, "o", "e"] +[186.477735, "o", "r"] +[186.570951, "o", "e"] +[186.621919, "o", " "] +[186.749962, "o", "a"] +[186.814544, "o", "r"] +[186.915797, "o", "e"] +[186.971487, "o", " "] +[187.165461, "o", "n"] +[187.246091, "o", "o"] +[187.346347, "o", " "] +[187.559526, "o", "o"] +[187.668583, "o", "p"] +[187.718056, "o", "e"] +[188.950305, "o", "n"] +[189.132854, "o", "e"] +[189.278635, "o", "d"] +[189.470868, "o", " "] +[189.824985, "o", "f"] +[189.950471, "o", "i"] +[190.036377, "o", "l"] +[190.103353, "o", "e"] +[190.302439, "o", "s"] +[193.022303, "o", "\r\n(gdb) "] +[206.790997, "o", "b"] +[206.883683, "o", "t"] +[207.622064, "o", "\r\n"] +[207.622477, "o", "#0 \u001b[33mdefault_idle\u001b[m () at \u001b[32march/x86/kernel/process.c\u001b[m:689\r\n#1 \u001b[34m0xc102c18d\u001b[m in \u001b[33march_cpu_idle\u001b[m () at \u001b[32march/x86/kernel/process.c\u001b[m:680\r\n"] +[207.628692, "o", "#2 \u001b[34m0xc15de082\u001b[m in \u001b[33mdefault_idle_call\u001b[m () at \u001b[32mkernel/sched/idle.c\u001b[m:112\r\n"] +[207.628733, "o", "#3 \u001b[34m0xc108d875\u001b[m in \u001b[33mcpuidle_idle_call\u001b[m () at \u001b[32mkernel/sched/idle.c\u001b[m:194\r\n"] +[207.62875, "o", "#4 \u001b[33mdo_idle\u001b[m () at \u001b[32mkernel/sched/idle.c\u001b[m:299\r\n"] +[207.635135, "o", "#5 \u001b[34m0xc108dbd5\u001b[m in \u001b[33mcpu_startup_entry\u001b[m (\u001b[36mstate=state@entry\u001b[m=CPUHP_ONLINE)\u001b[m\r\n \u001b[m at \u001b[32mkernel/sched/idle.c\u001b[m:395\r\n#6 \u001b[34m0xc15d6100\u001b[m in \u001b[33mrest_init\u001b[m () at \u001b[32minit/main.c\u001b[m:721\r\n#7 \u001b[34m0xc18c77de\u001b[m in \u001b[33march_call_rest_init\u001b[m () at \u001b[32minit/main.c\u001b[m:845\r\n"] +[207.635417, "o", "#8 \u001b[34m0xc18c7c30\u001b[m in \u001b[33mstart_kernel\u001b[m () at \u001b[32minit/main.c\u001b[m:1061\r\n"] +[207.637653, "o", "#9 \u001b[34m0xc18c7218\u001b[m in \u001b[33mi386_start_kernel\u001b[m () at \u001b[32march/x86/kernel/head32.c\u001b[m:56\r\n"] +[207.638067, "o", "#10 \u001b[34m0xc10001db\u001b[m in \u001b[33mstartup_32_smp\u001b[m () at \u001b[32march/x86/kernel/head_32.S\u001b[m:327\r\n"] +[207.638979, "o", "#11 \u001b[34m0x00000000\u001b[m in \u001b[33m??\u001b[m ()\r\n"] +[207.639304, "o", "(gdb) "] +[221.379671, "o", "#"] +[221.574477, "o", " "] +[221.763647, "o", "t"] +[221.859656, "o", "h"] +[221.910208, "o", "i"] +[222.230146, "o", "s"] +[223.003102, "o", " "] +[223.24279, "o", "d"] +[223.342376, "o", "o"] +[223.407244, "o", "e"] +[223.515893, "o", "s"] +[223.822602, "o", " "] +[224.052215, "o", "n"] +[224.126804, "o", "o"] +[224.262025, "o", "t"] +[224.350204, "o", " "] +[224.566282, "o", "l"] +[224.715391, "o", "o"] +[224.851651, "o", "o"] +[224.892089, "o", "k"] +[225.071297, "o", " "] +[225.204093, "o", "a"] +[225.777377, "o", "\b\u001b[K"] +[225.938217, "o", "l"] +[226.117327, "o", "i"] +[226.267363, "o", "k"] +[226.381869, "o", "e"] +[226.453866, "o", " "] +[226.610517, "o", "a"] +[226.678686, "o", " "] +[227.092186, "o", "k"] +[227.652524, "o", "\b\u001b[K"] +[227.802894, "o", "\b\u001b[K"] +[228.462601, "o", " "] +[228.691067, "o", "b"] +[228.726474, "o", "a"] +[228.861766, "o", "c"] +[228.973894, "o", "k"] +[229.137284, "o", "t"] +[229.283711, "o", "r"] +[229.350451, "o", "a"] +[229.511061, "o", "c"] +[229.59807, "o", "e"] +[229.749979, "o", " "] +[230.186502, "o", "f"] +[230.257607, "o", "o"] +[230.38212, "o", "r"] +[230.453689, "o", " "] +[230.550129, "o", "a"] +[230.679919, "o", " "] +[234.654082, "o", "e"] +[234.740562, "o", "r"] +[235.011631, "o", "\b\u001b[K"] +[235.118657, "o", "\b\u001b[K"] +[235.298981, "o", "k"] +[235.390755, "o", "e"] +[235.437752, "o", "r"] +[235.526007, "o", "n"] +[235.623409, "o", "e"] +[235.763733, "o", "l"] +[235.773498, "o", " "] +[236.126636, "o", "t"] +[236.241013, "o", "h"] +[236.33206, "o", "r"] +[236.397333, "o", "e"] +[236.446064, "o", "a"] +[236.607301, "o", "d"] +[240.062311, "o", "\r\n"] +[240.062479, "o", "(gdb) "] +[240.711316, "o", "#"] +[240.90636, "o", " "] +[241.166191, "o", "l"] +[241.419451, "o", "e"] +[241.659109, "o", "t"] +[241.86961, "o", "s"] +[242.043257, "o", " "] +[244.843651, "o", "p"] +[244.918102, "o", "p"] +[245.016449, "o", "u"] +[245.158034, "o", "t"] +[245.244774, "o", " "] +[245.393348, "o", "a"] +[245.41361, "o", " "] +[245.76255, "o", "\b\u001b[K"] +[245.881592, "o", "\b\u001b[K"] +[246.006241, "o", "\b\u001b[K"] +[246.137238, "o", "\b\u001b[K"] +[246.272147, "o", "\b\u001b[K"] +[246.395526, "o", "\b\u001b[K"] +[246.574088, "o", "i"] +[246.677415, "o", "t"] +[247.062963, "o", "\b\u001b[K"] +[247.181883, "o", "\b\u001b[K"] +[247.334239, "o", "u"] +[247.446138, "o", "t"] +[247.546923, "o", " "] +[247.67005, "o", "a"] +[247.773321, "o", " "] +[248.004601, "o", "b"] +[248.078817, "o", "r"] +[248.151286, "o", "e"] +[248.214162, "o", "a"] +[248.299871, "o", "k"] +[248.541708, "o", "p"] +[248.628067, "o", "o"] +[248.811761, "o", "i"] +[248.876511, "o", "n"] +[248.971272, "o", "t"] +[249.125851, "o", " "] +[249.392141, "o", "i"] +[249.555648, "o", "n"] +[250.946185, "o", "\b\u001b[K"] +[251.077786, "o", "\b\u001b[K"] +[251.710112, "o", "t"] +[251.812957, "o", "o"] +[252.861908, "o", " "] +[253.182436, "o", "\b\u001b[K"] +[253.293631, "o", "\b\u001b[K"] +[253.398191, "o", "\b\u001b[K"] +[253.766615, "o", "n"] +[254.131587, "o", "\b\u001b[K"] +[254.315781, "o", "i"] +[254.376792, "o", "n"] +[254.433173, "o", " "] +[254.573627, "o", "t"] +[254.631208, "o", "h"] +[254.750211, "o", "e"] +[254.82721, "o", " "] +[255.077806, "o", "c"] +[255.181875, "o", "o"] +[255.237745, "o", "n"] +[255.36349, "o", "t"] +[255.413945, "o", "e"] +[255.601549, "o", "x"] +[255.798133, "o", "t"] +[255.870077, "o", " "] +[256.177252, "o", "s"] +[256.356189, "o", "w"] +[256.420864, "o", "i"] +[256.530677, "o", "t"] +[256.731915, "o", "c"] +[256.77443, "o", "h"] +[256.883801, "o", " "] +[258.571714, "o", "r"] +[258.640714, "o", "o"] +[258.712151, "o", "u"] +[258.829888, "o", "t"] +[258.924193, "o", "i"] +[258.975067, "o", "n"] +[259.069473, "o", "e"] +[259.19488, "o", " "] +[259.366391, "o", "a"] +[259.47005, "o", "n"] +[259.557575, "o", "d"] +[259.654709, "o", " "] +[260.19587, "o", "w"] +[260.29212, "o", "a"] +[260.381822, "o", "i"] +[260.533426, "o", "t"] +[260.606091, "o", " "] +[260.739803, "o", "f"] +[260.798442, "o", "o"] +[261.681096, "o", "r"] +[262.777308, "o", " "] +[262.951854, "o", "a"] +[263.061053, "o", " "] +[263.308236, "o", "k"] +[263.390265, "o", "e"] +[264.173829, "o", "r"] +[264.245576, "o", "n"] +[264.382178, "o", "e \r"] +[264.493978, "o", "l"] +[264.674766, "o", " "] +[265.043389, "o", "t"] +[265.139075, "o", "h"] +[265.206648, "o", "r"] +[265.278568, "o", "e"] +[265.317655, "o", "a"] +[265.498561, "o", "d"] +[265.589679, "o", " "] +[265.76644, "o", "t"] +[265.821599, "o", "o"] +[265.886969, "o", " "] +[266.331796, "o", "b"] +[266.403363, "o", "e"] +[266.508099, "o", " "] +[266.655882, "o", "s"] +[266.917726, "o", "c"] +[267.287361, "o", "h"] +[267.381853, "o", "e"] +[267.523815, "o", "d"] +[267.574051, "o", "u"] +[267.763154, "o", "l"] +[267.820226, "o", "e"] +[267.957836, "o", "d"] +[268.446383, "o", "\r\n(gdb) "] +[269.741828, "o", "b"] +[269.808224, "o", "r"] +[269.870671, "o", "e"] +[269.893739, "o", "a"] +[269.998105, "o", "k"] +[270.085623, "o", " "] +[270.571466, "o", "_"] +[270.699335, "o", "_"] +[271.977106, "o", "c"] +[272.129589, "o", "o"] +[272.205444, "o", "n"] +[272.312765, "o", "t"] +[272.366081, "o", "e"] +[272.470791, "o", "\u0007"] +[273.62, "o", "\b\u001b[K"] +[273.74179, "o", "\b\u001b[K"] +[273.885881, "o", "\b\u001b[K"] +[274.018615, "o", "\b\u001b[K"] +[274.143134, "o", "\b\u001b[K"] +[274.263468, "o", "s"] +[274.422194, "o", "i"] +[274.501319, "o", "w"] +[274.719655, "o", "\u0007"] +[275.053997, "o", "\b\u001b[K"] +[275.187562, "o", "\b\u001b[K"] +[275.286581, "o", "w"] +[275.384725, "o", "i"] +[275.506637, "o", "\u0007tch_to"] +[276.392894, "o", "_"] +[276.621962, "o", "a"] +[276.699043, "o", "s"] +[276.905128, "o", "m "] +[277.262339, "o", "\r\n"] +[277.291402, "o", "Breakpoint 1 at \u001b[34m0xc10018e8\u001b[m: file \u001b[32march/x86/entry/entry_32.S\u001b[m, line 765.\r\n(gdb) "] +[280.133879, "o", "c"] +[281.222045, "o", "\b\u001b[K"] +[281.395261, "o", "\u0007"] +[281.857003, "o", "c"] +[282.237722, "o", "\b\u001b[K"] +[282.79563, "o", "l"] +[282.94158, "o", "i"] +[283.034854, "o", "s"] +[283.499366, "o", "\b\u001b[K"] +[283.624926, "o", "\b\u001b[K"] +[283.746958, "o", "\b\u001b[K"] +[284.082629, "o", "c"] +[284.174072, "o", "\r\nContinuing.\r\n"] +[284.176343, "o", "\r\n"] +[284.176419, "o", "Breakpoint 1, \u001b[33m__switch_to_asm\u001b[m () at \u001b[32march/x86/entry/entry_32.S\u001b[m:765\r\n"] +[284.176491, "o", "765\t\tpushl\t%ebp\r\n(gdb) "] +[284.835414, "o", "l"] +[284.989635, "o", "i"] +[285.046355, "o", "s"] +[285.255024, "o", "t"] +[285.341593, "o", " "] +[287.197351, "o", "7"] +[287.491808, "o", "5"] +[287.755423, "o", "\b\u001b[K"] +[287.867358, "o", "6"] +[287.926098, "o", "0"] +[288.134096, "o", "\r\n"] +[288.135789, "o", "755\t/*\r\n756\t * %eax: prev task\r\n757\t * %edx: next task\r\n758\t */\r\n759\t.pushsection .text, \"ax\"\r\n760\tSYM_CODE_START(__switch_to_asm)\r\n761\t\t/*\r\n762\t\t * Save callee-saved registers\r\n763\t\t * This must match the order in struct inactive_task_frame\r\n764\t\t */\r\n"] +[288.135987, "o", "(gdb) "] +[288.699747, "o", "b"] +[288.811578, "o", "r"] +[288.862172, "o", "e"] +[288.942284, "o", "a"] +[289.033603, "o", "k"] +[289.355453, "o", " "] +[290.255841, "o", "\b\u001b[K"] +[290.384911, "o", "\b\u001b[K"] +[290.523826, "o", "\b\u001b[K"] +[290.661962, "o", "\b\u001b[K"] +[290.807839, "o", "\b\u001b[K"] +[290.941606, "o", "\b\u001b[K"] +[291.36621, "o", "\u0007"] +[292.275081, "o", "p"] +[292.451403, "o", "r"] +[292.517991, "o", "i"] +[292.602359, "o", "n"] +[292.667764, "o", "t"] +[292.766382, "o", " "] +[293.12916, "o", "("] +[293.699471, "o", "("] +[293.957439, "o", "t"] +[294.147806, "o", "a"] +[294.293733, "o", "s"] +[294.373217, "o", "k"] +[294.587807, "o", "_"] +[294.715863, "o", "s"] +[294.887282, "o", "t"] +[294.933917, "o", "r"] +[294.997435, "o", "u"] +[295.132333, "o", "c"] +[295.506086, "o", "\b\u001b[K"] +[296.004542, "o", "\b\u001b[K"] +[296.035262, "o", "\b\u001b[K"] +[296.084305, "o", "\b\u001b[K"] +[296.11605, "o", "\b\u001b[K"] +[296.147494, "o", "\b\u001b[K"] +[296.178799, "o", "\b\u001b[K"] +[296.317612, "o", "\b\u001b[K"] +[296.463433, "o", "\b\u001b[K"] +[296.718813, "o", "\b\u001b[K"] +[296.814308, "o", "s"] +[296.96261, "o", "t"] +[297.013641, "o", "r"] +[297.061739, "o", "u"] +[297.219398, "o", "c"] +[297.398522, "o", "t"] +[297.437315, "o", " "] +[297.542878, "o", "t"] +[297.637975, "o", "a"] +[297.747511, "o", "s"] +[297.819434, "o", "k"] +[298.016074, "o", "_"] +[298.117426, "o", "s"] +[298.27901, "o", "t"] +[298.342356, "o", "r"] +[298.422953, "o", "u"] +[298.524196, "o", "c"] +[298.698718, "o", "t"] +[298.989581, "o", "*"] +[298.997725, "o", "("] +[299.132205, "o", ")"] +[299.518034, "o", "\b\u001b[K"] +[299.613309, "o", "\b\u001b[K"] +[299.770424, "o", ")"] +[299.910219, "o", ")"] +[301.062221, "o", "\b\u001b[K"] +[301.485963, "o", "$"] +[302.314939, "o", "e"] +[304.099819, "o", "a"] +[304.402235, "o", "x"] +[304.916033, "o", ")"] +[305.422261, "o", "-"] +[305.88182, "o", ">"] +[306.029879, "o", "c"] +[306.133893, "o", "o"] +[306.256156, "o", "m"] +[306.398024, "o", "m"] +[306.597814, "o", "\r\n"] +[306.615485, "o", "$3 = \"swapper/0\\000\\000\\000\\000\\000\\000\"\r\n(gdb) "] +[310.517841, "o", "#"] +[310.726136, "o", " "] +[313.62966, "o", "\b\u001b[K"] +[313.774295, "o", "\b\u001b[K"] +[313.91453, "o", "\u0007"] +[315.397631, "o", "#"] +[315.577077, "o", " "] +[315.878097, "o", "c"] +[315.989862, "o", "u"] +[316.183243, "o", "r"] +[316.309684, "o", "r"] +[316.389297, "o", "e"] +[316.850982, "o", "n"] +[317.125964, "o", " "] +[317.295363, "o", "t"] +[317.403069, "o", "h"] +[317.478569, "o", "r"] +[317.551332, "o", "e"] +[317.594205, "o", "a"] +[317.751491, "o", "d"] +[317.949486, "o", "\b\u001b[K"] +[318.464381, "o", "\b\u001b[K"] +[318.487912, "o", "\b\u001b[K"] +[318.519446, "o", "\b\u001b[K"] +[318.55032, "o", "\b\u001b[K"] +[318.680362, "o", "\b\u001b[K"] +[318.817765, "o", "\b\u001b[K"] +[318.957945, "o", "t"] +[319.089737, "o", " "] +[319.291116, "o", "t"] +[319.389904, "o", "h"] +[319.647007, "o", "r"] +[319.74256, "o", "e"] +[319.78242, "o", "a"] +[319.942132, "o", "d"] +[320.030061, "o", " "] +[320.141699, "o", "i"] +[320.233772, "o", "s"] +[320.349512, "o", " "] +[320.443032, "o", "s"] +[320.80826, "o", "t"] +[320.90173, "o", "i"] +[321.069829, "o", "l"] +[321.204297, "o", "l"] +[321.259689, "o", " "] +[321.588152, "o", "s"] +[321.794884, "o", "w"] +[321.871903, "o", "a"] +[321.981533, "o", "p"] +[322.102066, "o", "p"] +[322.186971, "o", "e"] +[322.261993, "o", "r"] +[322.405556, "o", "\r\n"] +[322.405607, "o", "(gdb) "] +[322.726159, "o", "c"] +[322.901526, "o", "\r\nContinuing.\r\n"] +[322.912896, "o", "\r\n"] +[322.913085, "o", "Breakpoint 1, \u001b[33m__switch_to_asm\u001b[m () at \u001b[32march/x86/entry/entry_32.S\u001b[m:765\r\n765\t\tpushl\t%ebp\r\n(gdb) "] +[324.760917, "o", "c"] +[325.18249, "o", "\b# current thread is still swapper"] +[325.878372, "o", "\r\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[Cprint ((struct task_struct*)$eax)->comm"] +[327.053947, "o", "\r\n"] +[327.071409, "o", "$4 = \"rcu_sched\\000\\000\\000\\000\\000\\000\"\r\n(gdb) "] +[329.503754, "o", "#"] +[330.910795, "o", "\b\u001b[K"] +[334.523368, "o", "c"] +[335.314474, "o", "\b\u001b[K"] +[335.958257, "o", "b"] +[336.018706, "o", "t"] +[337.398802, "o", "\r\n"] +[337.414221, "o", "#0 \u001b[33m__switch_to_asm\u001b[m () at \u001b[32march/x86/entry/entry_32.S\u001b[m:765\r\n"] +[337.414915, "o", "#1 \u001b[34m0xc15d8277\u001b[m in \u001b[33mcontext_switch\u001b[m (\u001b[36mrf\u001b[m=0xc2561eb4, \u001b[36mnext\u001b[m=, \u001b[m\r\n"] +[337.414988, "o", " \u001b[m\u001b[36mprev\u001b[m=0xc254c080, \u001b[36mrq\u001b[m=0xcfdcb700) at \u001b[32mkernel/sched/core.c\u001b[m:3779\r\n"] +[337.415254, "o", "#2 \u001b[33m__schedule\u001b[m (\u001b[36mpreempt\u001b[m=, \u001b[36mpreempt@entry\u001b[m=false)\u001b[m\r\n \u001b[m at \u001b[32mkernel/sched/core.c\u001b[m:4528\r\n#3 \u001b[34m0xc15d86ce\u001b[m in \u001b[33mschedule\u001b[m () at \u001b[32mkernel/sched/core.c\u001b[m:4606\r\n"] +[337.442508, "o", "#4 \u001b[34m0xc15dd6a7\u001b[m in \u001b[33mschedule_timeout\u001b[m (\u001b[36mtimeout=timeout@entry\u001b[m=1)\u001b[m\r\n \u001b[m at \u001b[32mkernel/time/timer.c\u001b[m:1871\r\n#5 \u001b[34m0xc10dcfa0\u001b[m in \u001b[33mrcu_gp_fqs_loop\u001b[m () at \u001b[32mkernel/rcu/tree.c\u001b[m:1928\r\n"] +[337.44868, "o", "#6 \u001b[33mrcu_gp_kthread\u001b[m (\u001b[36munused=unused@entry\u001b[m=0x0) at \u001b[32mkernel/rcu/tree.c\u001b[m:2102\r\n"] +[337.449075, "o", "#7 \u001b[34m0xc107c753\u001b[m in \u001b[33mkthread\u001b[m (\u001b[36m_create\u001b[m=0xc2408ca0) at \u001b[32mkernel/kthread.c\u001b[m:292\r\n#8 \u001b[34m0xc1001960\u001b[m in \u001b[33mret_from_fork\u001b[m () at \u001b[32march/x86/entry/entry_32.S\u001b[m:850\r\n"] +[337.449715, "o", "#9 \u001b[34m0x00000000\u001b[m in \u001b[33m??\u001b[m ()\r\n"] +[337.450155, "o", "(gdb) "] +[340.203269, "o", "t"] +[340.851246, "o", "\b\u001b[K"] +[341.14753, "o", "#"] +[341.387109, "o", " "] +[341.451102, "o", " "] +[341.661207, "o", "t"] +[341.978747, "o", "\b\u001b[K"] +[342.107108, "o", "\b\u001b[K"] +[342.349889, "o", "t"] +[342.491659, "o", "h"] +[342.515285, "o", "i"] +[342.699961, "o", "s"] +[342.805615, "o", " "] +[343.002237, "o", "l"] +[343.18705, "o", "o"] +[343.303844, "o", "o"] +[343.390559, "o", "k"] +[343.554804, "o", "s"] +[343.653215, "o", " "] +[343.835557, "o", "l"] +[343.978652, "o", "i"] +[344.125443, "o", "k"] +[344.223034, "o", "e"] +[344.312753, "o", " "] +[344.440075, "o", "a"] +[344.512933, "o", " "] +[344.889199, "o", "k"] +[345.025482, "o", "e"] +[345.045973, "o", "r"] +[345.140636, "o", "n"] +[345.237924, "o", "e"] +[345.325469, "o", "l"] +[345.422476, "o", " "] +[345.621936, "o", "t"] +[345.741317, "o", "h"] +[345.826829, "o", "r"] +[345.901016, "o", "e"] +[345.937951, "o", "a"] +[346.083099, "o", "d"] +[348.901657, "o", "\r\n(gdb) "] +[349.394054, "o", "#"] +[349.654109, "o", " "] +[350.307681, "o", "n"] +[350.395153, "o", "o"] +[350.899543, "o", "t"] +[351.003489, "o", "i"] +[351.17943, "o", "c"] +[351.246088, "o", "e"] +[351.469435, "o", " "] +[352.502347, "o", "t"] +[352.66686, "o", "h"] +[352.760953, "o", "a"] +[352.894464, "o", "t"] +[353.085035, "o", " "] +[355.643772, "o", "a"] +[356.059323, "o", " "] +[356.227601, "o", "k"] +[356.343137, "o", "e"] +[356.405474, "o", "r"] +[356.477181, "o", "n"] +[356.57342, "o", "e"] +[356.659084, "o", "l"] +[356.761894, "o", " "] +[356.941633, "o", "t"] +[357.045635, "o", "h"] +[357.112728, "o", "r"] +[357.179646, "o", "e"] +[357.241188, "o", "a"] +[357.707008, "o", "d"] +[357.885383, "o", " "] +[358.117478, "o", "s"] +[358.283371, "o", "t"] +[358.343069, "o", "a"] +[358.452513, "o", "r"] +[359.795314, "o", "t"] +[360.037755, "o", "s"] +[360.458901, "o", " "] +[366.070028, "o", "f"] +[366.133469, "o", "r"] +[366.261029, "o", "o"] +[366.381485, "o", "m"] +[366.530389, "o", " "] +[366.597727, "o", "a"] +[366.725499, "o", " "] +[366.850841, "o", "s"] +[366.979612, "o", "p"] +[367.024724, "o", "e"] +[367.237378, "o", "c"] +[367.357012, "o", "i"] +[367.437107, "o", "a"] +[367.618604, "o", "l"] +[367.985673, "o", " "] +[368.269841, "o", "f"] +[368.365828, "o", "o"] +[368.554828, "o", "r"] +[368.650754, "o", "k"] +[371.995165, "o", " "] +[372.946533, "o", "\b\u001b[K"] +[375.702282, "o", "\r\n"] +[375.702404, "o", "(gdb) "] +[398.867113, "o", "#"] +[399.146887, "o", " "] +[399.331616, "o", "a"] +[399.597434, "o", "l"] +[399.730647, "o", "s"] +[399.805839, "o", "o"] +[399.909163, "o", " "] +[400.11739, "o", "n"] +[400.165148, "o", "o"] +[400.285605, "o", "t"] +[400.365312, "o", "i"] +[400.485439, "o", "c"] +[400.579979, "o", "e"] +[400.65356, "o", " "] +[400.806284, "o", "a"] +[401.194396, "o", "t"] +[401.781734, "o", "\b\u001b[K"] +[401.914341, "o", "\b\u001b[K"] +[402.030244, "o", "t"] +[402.145374, "o", "h"] +[402.245147, "o", "a"] +[402.309402, "o", "t"] +[402.382045, "o", " "] +[402.525574, "o", "a"] +[402.629039, "o", "l"] +[402.754007, "o", "l"] +[402.821383, "o", " "] +[402.990226, "o", "k"] +[403.06459, "o", "e"] +[403.117803, "o", "r"] +[403.205019, "o", "n"] +[403.318207, "o", "e"] +[403.341663, "o", "l"] +[403.429692, "o", " "] +[403.629956, "o", "t"] +[403.741843, "o", "h"] +[403.808117, "o", "r"] +[403.88467, "o", "e"] +[403.95492, "o", "a"] +[404.141474, "o", "d"] +[404.205018, "o", "s"] +[404.326094, "o", " "] +[404.539117, "o", "u"] +[404.589606, "o", "s"] +[404.733589, "o", "e"] +[404.869205, "o", " "] +[407.242269, "o", "t"] +[407.32576, "o", "h"] +[407.4612, "o", "e"] +[407.541613, "o", " "] +[407.762738, "o", "k"] +[409.130554, "o", "t"] +[409.621372, "o", "h"] +[409.695796, "o", "r"] +[409.741875, "o", "e"] +[409.789943, "o", "a"] +[409.939027, "o", "d"] +[410.021602, "o", " "] +[410.218404, "o", "f"] +[410.323032, "o", "u"] +[410.371459, "o", "n"] +[410.474746, "o", "c"] +[410.688493, "o", "t"] +[410.7416, "o", "i"] +[410.779033, "o", "o"] +[411.003284, "o", "n"] +[411.093928, "o", " "] +[412.117382, "o", "t"] +[412.189337, "o", "o"] +[412.341309, "o", " "] +[412.506227, "o", "n"] +[412.589441, "o", "i"] +[412.850878, "o", "t"] +[412.986206, "o", "i"] +[413.427046, "o", "\b\u001b[K"] +[413.554719, "o", "\b\u001b[K"] +[413.677038, "o", "\b\u001b[K"] +[413.835017, "o", "\b\u001b[K"] +[414.005774, "o", "i"] +[414.484917, "o", "n"] +[414.540995, "o", "i"] +[414.693336, "o", "t"] +[414.7614, "o", "i"] +[414.863364, "o", "a"] +[414.998825, "o", "l"] +[415.062427, "o", "i \r"] +[415.166791, "o", "z"] +[415.3332, "o", "e"] +[415.478198, "o", "\r\n(gdb) "] +[442.445477, "o", "quit\r\n"] +[442.445561, "o", "A debugging session is active.\r\n\r\n\tInferior 1 [process 1] will be detached.\r\n\r\nQuit anyway? (y or n) "] +[442.970982, "o", "y"] +[443.214441, "o", "\r\n"] +[443.214527, "o", "Detaching from program: /linux/vmlinux, process 1\r\n"] +[443.215393, "o", "Ending remote debugging.\r\n"] +[443.215436, "o", "[Inferior 1 (process 1) detached]\r\n"] +[443.220945, "o", "$ "] +[444.348773, "o", "\r\n"] diff --git a/Documentation/teaching/res/ksoftirqd-packet-flood.cast b/Documentation/teaching/res/ksoftirqd-packet-flood.cast new file mode 100644 index 00000000000000..6f2392386da4fe --- /dev/null +++ b/Documentation/teaching/res/ksoftirqd-packet-flood.cast @@ -0,0 +1,462 @@ +{"version": 2, "width": 80, "height": 24, "timestamp": 1616354478, "idle_time_limit": 1.0, "env": {"SHELL": null, "TERM": "xterm"}} +[0.002236, "o", "$ "] +[1.852599, "o", "m"] +[2.132922, "o", "i"] +[2.332344, "o", "n"] +[2.388604, "o", "i"] +[2.612998, "o", "c"] +[2.684655, "o", "o"] +[2.747904, "o", "m"] +[2.877702, "o", " "] +[3.052747, "o", "-"] +[3.317509, "o", "D"] +[3.468219, "o", " "] +[3.652408, "o", "s"] +[3.741658, "o", "e"] +[3.820052, "o", "r"] +[3.884557, "o", "i"] +[4.017426, "o", "a"] +[4.097078, "o", "l"] +[4.294502, "o", "."] +[4.483502, "o", "p"] +[4.604408, "o", "t"] +[4.836403, "o", "s"] +[5.173772, "o", "\r\n"] +[5.174525, "o", "\u001b[!p\u001b[?3;4l\u001b[4l\u001b>\u001b[0m\u001b(B"] +[5.174615, "o", "\u001b[?1h\u001b=\u001b[H\u001b[2J"] +[5.174986, "o", "\u001b[?12l\u001b[?25h\nWelcome to minicom 2.7.1\r\n\nOPTIONS: I18n \r\nCompiled on Dec 23 2019, 02:06:26.\r\nPort serial.pts, 19:21:04\r\n\nPress CTRL-A Z for help on special keys\r\n\n"] +[6.310469, "o", "\n"] +[6.311875, "o", "root@qemux86:~# "] +[7.981099, "o", "t"] +[8.029191, "o", "o"] +[8.086076, "o", "p"] +[8.245555, "o", "\r\n"] +[8.399818, "o", "\u001b[1;1H\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\u001b[1;1H\u001b[KMem: 34424K used, 206256K free, 3836K shrd, 380K buff, 8008K cached"] +[8.399997, "o", "\r\n"] +[8.400309, "o", "CPU: 0% usr 8% sys 0% nic 91% idle 0% io 0% irq 0% sirq"] +[8.400449, "o", "\r\n"] +[8.40103, "o", "Load average: 0.10 0.30 0.18 1/37 5529"] +[8.401214, "o", "\r\n"] +[8.41045, "o", "\u001b[0m\u001b(B\u001b[7m PID PPID USER STAT VSZ %VSZ %CPU COMMAND"] +[8.41063, "o", "\r\n"] +[8.411801, "o", "\u001b[0m\u001b(B 5529 5474 root R 2828 1% 6% top"] +[8.411997, "o", "\r\n"] +[8.412704, "o", " 10 2 root IW 0 0% 3% [rcu_sched]"] +[8.412931, "o", "\r\n"] +[8.413466, "o", " 5474 1 root S 2972 1% 0% -sh"] +[8.41367, "o", "\r\n"] +[8.414183, "o", " 198 1 root S 2828 1% 0% /sbin/syslogd -n -O /var/log/messages"] +[8.414369, "o", "\r\n"] +[8.41567, "o", " 187 1 root S 2828 1% 0% udhcpc -R -b -p /var/run/udhcpc.eth0.p\u001b[9;80H"] +[8.415869, "o", "\r\n"] +[8.416317, "o", " 201 1 root S 2828 1% 0% /sbin/klogd -n"] +[8.416484, "o", "\r\n"] +[8.41698, "o", " 207 1 root S 2828 1% 0% /sbin/getty 38400 tty1"] +[8.417134, "o", "\r\n"] +[8.417626, "o", " 209 1 root S 2828 1% 0% /sbin/getty 38400 tty2"] +[8.417853, "o", "\r\n"] +[8.418241, "o", " 210 1 root S 2828 1% 0% /sbin/getty 38400 tty3"] +[8.418372, "o", "\r\n"] +[8.419445, "o", " 211 1 root S 2828 1% 0% /sbin/getty 38400 tty4"] +[8.419591, "o", "\r\n"] +[8.420058, "o", " 212 1 root S 2828 1% 0% /sbin/getty 38400 tty5"] +[8.420184, "o", "\r\n"] +[8.42079, "o", " 1 0 root S 2004 1% 0% init [5]"] +[8.420935, "o", "\r\n"] +[8.421371, "o", " 9 2 root SW 0 0% 0% [ksoftirqd/0]"] +[8.421516, "o", "\r\n"] +[8.421919, "o", " 42 2 root SWN 0 0% 0% [kmemleak]"] +[8.422058, "o", "\r\n"] +[8.422446, "o", " 39 2 root IW 0 0% 0% [kworker/0:2-mm_]"] +[8.422821, "o", "\r\n"] +[8.423488, "o", " 13 2 root SW 0 0% 0% [kdevtmpfs]"] +[8.423618, "o", "\r\n"] +[8.424021, "o", " 7 2 root IW 0 0% 0% [kworker/u2:0-ev]"] +[8.424538, "o", "\r\n"] +[8.424606, "o", " 38 2 root IW 0 0% 0% [kworker/u2:1-ev]"] +[8.4247, "o", "\r\n"] +[8.425134, "o", " 34 2 root IW< 0 0% 0% [kworker/0:1H-kb]"] +[8.425281, "o", "\r\n"] +[8.425506, "o", " 43 2 root SW 0 0% 0% [jbd2/vda-8]\r"] +[13.447981, "o", "\u001b[1;1H\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\u001b[1;1H\u001b[KMem: 34424K used, 206256K free, 3836K shrd, 380K buff, 8008K cached"] +[13.448168, "o", "\r\n"] +[13.44854, "o", "CPU: 0% usr 0% sys 0% nic 99% idle 0% io 0% irq 0% sirq"] +[13.448659, "o", "\r\n"] +[13.449284, "o", "Load average: 0.09 0.30 0.18 1/37 5529"] +[13.449591, "o", "\r\n"] +[13.451544, "o", "\u001b[0m\u001b(B\u001b[7m PID PPID USER STAT VSZ %VSZ %CPU COMMAND"] +[13.451812, "o", "\r\n"] +[13.452307, "o", "\u001b[0m\u001b(B 5529 5474 root R 2972 1% 1% top"] +[13.452473, "o", "\r\n"] +[13.45301, "o", " 5474 1 root S 2972 1% 0% -sh"] +[13.453275, "o", "\r\n"] +[13.453876, "o", " 198 1 root S 2828 1% 0% /sbin/syslogd -n -O /var/log/messages"] +[13.454093, "o", "\r\n"] +[13.454812, "o", " 187 1 root S 2828 1% 0% udhcpc -R -b -p /var/run/udhcpc.eth0.p\u001b[8;80H"] +[13.454925, "o", "\r\n"] +[13.455729, "o", " 201 1 root S 2828 1% 0% /sbin/klogd -n"] +[13.455941, "o", "\r\n"] +[13.456477, "o", " 207 1 root S 2828 1% 0% /sbin/getty 38400 tty1"] +[13.456654, "o", "\r\n"] +[13.457185, "o", " 209 1 root S 2828 1% 0% /sbin/getty 38400 tty2"] +[13.457362, "o", "\r\n"] +[13.457758, "o", " 210 1 root S 2828 1% 0% /sbin/getty 38400 tty3"] +[13.457938, "o", "\r\n"] +[13.458413, "o", " 211 1 root S 2828 1% 0% /sbin/getty 38400 tty4"] +[13.458715, "o", "\r\n"] +[13.459436, "o", " 212 1 root S 2828 1% 0% /sbin/getty 38400 tty5"] +[13.459594, "o", "\r\n"] +[13.460002, "o", " 1 0 root S 2004 1% 0% init [5]"] +[13.46014, "o", "\r\n"] +[13.46068, "o", " 10 2 root IW 0 0% 0% [rcu_sched]"] +[13.460878, "o", "\r\n"] +[13.461278, "o", " 9 2 root SW 0 0% 0% [ksoftirqd/0]"] +[13.4614, "o", "\r\n"] +[13.461837, "o", " 42 2 root SWN 0 0% 0% [kmemleak]"] +[13.46196, "o", "\r\n"] +[13.462358, "o", " 39 2 root IW 0 0% 0% [kworker/0:2-eve]"] +[13.462478, "o", "\r\n"] +[13.463428, "o", " 13 2 root SW 0 0% 0% [kdevtmpfs]"] +[13.46356, "o", "\r\n"] +[13.463973, "o", " 7 2 root IW 0 0% 0% [kworker/u2:0-ev]"] +[13.464125, "o", "\r\n"] +[13.464593, "o", " 38 2 root IW 0 0% 0% [kworker/u2:1-ev]"] +[13.464709, "o", "\r\n"] +[13.465189, "o", " 34 2 root IW< 0 0% 0% [kworker/0:1H-kb]"] +[13.465323, "o", "\r\n"] +[13.465519, "o", " 43 2 root SW 0 0% 0% [jbd2/vda-8]\r"] +[18.487741, "o", "\u001b[1;1H\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\u001b[1;1H\u001b[KMem: 34424K used, 206256K free, 3836K shrd, 380K buff, 8008K cached"] +[18.488024, "o", "\r\n"] +[18.488389, "o", "CPU: 0% usr 0% sys 0% nic 99% idle 0% io 0% irq 0% sirq"] +[18.488672, "o", "\r\n"] +[18.489375, "o", "Load average: 0.08 0.29 0.18 1/37 5529"] +[18.489628, "o", "\r\n"] +[18.491955, "o", "\u001b[0m\u001b(B\u001b[7m PID PPID USER STAT VSZ %VSZ %CPU COMMAND"] +[18.492108, "o", "\r\n"] +[18.492801, "o", "\u001b[0m\u001b(B 5529 5474 root R 2972 1% 1% top"] +[18.493112, "o", "\r\n"] +[18.493653, "o", " 5474 1 root S 2972 1% 0% -sh"] +[18.493772, "o", "\r\n"] +[18.494187, "o", " 198 1 root S 2828 1% 0% /sbin/syslogd -n -O /var/log/messages"] +[18.494394, "o", "\r\n"] +[18.495284, "o", " 187 1 root S 2828 1% 0% udhcpc -R -b -p /var/run/udhcpc.eth0.p\u001b[8;80H"] +[18.495442, "o", "\r\n"] +[18.495842, "o", " 201 1 root S 2828 1% 0% /sbin/klogd -n"] +[18.49601, "o", "\r\n"] +[18.496439, "o", " 207 1 root S 2828 1% 0% /sbin/getty 38400 tty1"] +[18.49661, "o", "\r\n"] +[18.497041, "o", " 209 1 root S 2828 1% 0% /sbin/getty 38400 tty2"] +[18.497175, "o", "\r\n"] +[18.497594, "o", " 210 1 root S 2828 1% 0% /sbin/getty 38400 tty3"] +[18.497754, "o", "\r\n"] +[18.498242, "o", " 211 1 root S 2828 1% 0% /sbin/getty 38400 tty4"] +[18.498393, "o", "\r\n"] +[18.499275, "o", " 212 1 root S 2828 1% 0% /sbin/getty 38400 tty5"] +[18.499412, "o", "\r\n"] +[18.499785, "o", " 1 0 root S 2004 1% 0% init [5]"] +[18.499938, "o", "\r\n"] +[18.500316, "o", " 10 2 root IW 0 0% 0% [rcu_sched]"] +[18.50045, "o", "\r\n"] +[18.500827, "o", " 9 2 root SW 0 0% 0% [ksoftirqd/0]"] +[18.500944, "o", "\r\n"] +[18.501445, "o", " 42 2 root SWN 0 0% 0% [kmemleak]"] +[18.50159, "o", "\r\n"] +[18.502138, "o", " 39 2 root IW 0 0% 0% [kworker/0:2-eve]"] +[18.502298, "o", "\r\n"] +[18.50317, "o", " 13 2 root SW 0 0% 0% [kdevtmpfs]"] +[18.503323, "o", "\r\n"] +[18.50372, "o", " 7 2 root IW 0 0% 0% [kworker/u2:0-ev]"] +[18.50386, "o", "\r\n"] +[18.504334, "o", " 38 2 root IW 0 0% 0% [kworker/u2:1-ev]"] +[18.504402, "o", "\r\n"] +[18.504755, "o", " 34 2 root IW< 0 0% 0% [kworker/0:1H-kb]"] +[18.504906, "o", "\r\n"] +[18.505094, "o", " 43 2 root SW 0 0% 0% [jbd2/vda-8]\r"] +[19.166233, "o", "\n\u001b[23;80H \u001b[24;1H"] +[19.169564, "o", "root@qemux86:~# "] +[23.887009, "o", "i"] +[24.003278, "o", "f"] +[25.38907, "o", "\b\u001b[K"] +[25.612968, "o", "p"] +[25.845698, "o", " "] +[26.036611, "o", "l"] +[26.229837, "o", "i"] +[26.685128, "o", "\b\u001b[K"] +[26.805031, "o", "\b\u001b[K"] +[26.89406, "o", "a"] +[26.974792, "o", "d"] +[27.125316, "o", "d"] +[27.30899, "o", "r"] +[27.389767, "o", "\r\n\u001b[23;80H \u001b[24;1H"] +[27.405865, "o", "1: lo: mtu 65536 qdisc noqueue qlen 1000"] +[27.406054, "o", "\r\n\u001b[23;80H \u001b[24;1H"] +[27.406659, "o", " link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00"] +[27.406961, "o", "\r\n\u001b[23;80H \u001b[24;1H"] +[27.408609, "o", " inet 127.0.0.1/8 scope host lo"] +[27.408902, "o", "\r\n\u001b[23;80H \u001b[24;1H"] +[27.409235, "o", " valid_lft forever preferred_lft forever"] +[27.409376, "o", "\r\n\u001b[23;80H \u001b[24;1H"] +[27.40968, "o", " inet6 ::1/128 scope host "] +[27.409834, "o", "\r\n\u001b[23;80H \u001b[24;1H"] +[27.410081, "o", " valid_lft forever preferred_lft forever"] +[27.41027, "o", "\r\n\u001b[23;80H \u001b[24;1H"] +[27.411406, "o", "2: eth0: mtu 1500 qdisc pfifo_fast qlen 1000"] +[27.41166, "o", "\r\n\u001b[23;80H \u001b[24;1H"] +[27.41196, "o", " link/ether 52:54:00:12:34:56 brd ff:ff:ff:ff:ff:ff\r\n\u001b[23;80H \u001b[24;1H"] +[27.412383, "o", " inet 172.213.0.18/24 brd 172.213.0.255 scope global eth0\r\n\u001b[23;80H \u001b[24;1H"] +[27.412724, "o", " valid_lft forever preferred_lft forever"] +[27.412907, "o", "\r\n\u001b[23;80H \u001b[24;1H"] +[27.413445, "o", "3: sit0@NONE: mtu 1480 qdisc noop qlen 1000"] +[27.413562, "o", "\r\n\u001b[23;80H \u001b[24;1H"] +[27.413865, "o", " link/sit 0.0.0.0 brd 0.0.0.0"] +[27.414033, "o", "\r\n\u001b[23;80H \u001b[24;1H"] +[27.416761, "o", "root@qemux86:~# "] +[38.933061, "o", "\u001b[0m\u001b(B\u001b[7m\r\u001b[K\u001b[?12l\u001b[?25h\u001b[?25lCTRL-A Z for help | 115200 8N1 | NOR | Minicom 2.7.1 | VT102 | Offline | al.pts\u001b[?12l\u001b[?25h"] +[38.933124, "o", "\u001b[24;17H"] +[40.700594, "o", "\u001b[8;30H\u001b[?25l\u001b[0m\u001b(B\u001b(0lqqqqqqqqqqqqqqqqqqqqqqk\u001b[9;30Hx\u001b[0m\u001b(B Leave Minicom? \u001b[0m\u001b(B\u001b(0x\u001b[10;30Hx\u001b[0m\u001b(B No \u001b[0m\u001b(B\u001b(0x\u001b[11;30Hmqqqqqqqqqqqqqqqqqqqqqqj\u001b[10;51H\u001b[?25l\u001b[10;33H\u001b[0m\u001b(B\u001b[7m Yes "] +[42.276438, "o", "\u001b[?12l\u001b[?25h\u001b[8;1H\u001b[0m\u001b(B 38 2 root IW 0 0% 0% [kworker/u2\u001b[9;1H 34 2 root IW< 0 0% 0% [kworker/0:\u001b[10;1H 43 2 root SW 0 0% 0% [jbd2/vda-8\u001b[11;1Hroot@qemux86:~# ip addr \u001b[24;17H\u001b[0m\u001b(B\u001b[7m\u001b[?12l\u001b[?25h"] +[42.276595, "o", "\u001b[?12l\u001b[?25h\u001b[0m\u001b(B\u001b[H\u001b[2J\u001b[?12l\u001b[?25h\u001b[?1l\u001b>\u001b[!p\u001b[?3;4l\u001b[4l\u001b>"] +[42.276685, "o", "$ "] +[73.938043, "o", "n"] +[74.028267, "o", "o"] +[74.219846, "o", "h"] +[74.436234, "o", "u"] +[74.53207, "o", "p"] +[74.781125, "o", " "] +[75.188776, "o", "s"] +[75.276111, "o", "u"] +[75.916849, "o", "d"] +[75.992557, "o", "o"] +[76.120543, "o", " "] +[76.4366, "o", "p"] +[76.56442, "o", "i"] +[76.748589, "o", "n"] +[76.852263, "o", "g"] +[76.916772, "o", " "] +[77.073216, "o", "-"] +[77.184444, "o", "f"] +[77.284511, "o", " "] +[79.70078, "o", "172.213.0.18"] +[80.644466, "o", " "] +[81.313662, "o", "&"] +[81.95608, "o", "\r\n"] +[81.956239, "o", "$ "] +[81.956532, "o", "nohup: ignoring input and appending output to 'nohup.out'\r\n"] +[83.107734, "o", "\r\n$ "] +[84.435832, "o", "m"] +[84.532479, "o", "i"] +[84.643806, "o", "n"] +[84.820208, "o", "c"] +[84.891586, "o", "o"] +[84.971776, "o", "m"] +[92.307554, "o", " "] +[92.403882, "o", "-"] +[92.675839, "o", "D"] +[92.787506, "o", " "] +[93.563841, "o", "s"] +[93.652089, "o", "e"] +[93.715884, "o", "r"] +[93.763667, "o", "i"] +[93.875947, "o", "a"] +[93.93975, "o", "l"] +[94.116003, "o", "."] +[94.364381, "o", "p"] +[94.484616, "o", "t"] +[94.700067, "o", "s"] +[94.995562, "o", "\r\n"] +[94.996318, "o", "\u001b[!p\u001b[?3;4l\u001b[4l\u001b>\u001b[0m\u001b(B"] +[94.996448, "o", "\u001b[?1h\u001b=\u001b[H\u001b[2J"] +[94.996773, "o", "\u001b[?12l\u001b[?25h\nWelcome to minicom 2.7.1\r\n\nOPTIONS: I18n \r\nCompiled on Dec 23 2019, 02:06:26.\r\nPort serial.pts, 19:21:44\r\n\nPress CTRL-A Z for help on special keys\r\n\n"] +[95.525135, "o", "\n"] +[95.526362, "o", "root@qemux86:~# "] +[96.060597, "o", "t"] +[96.108363, "o", "o"] +[96.159286, "o", "p"] +[96.338736, "o", "\r\n"] +[96.58656, "o", "\u001b[1;1H\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\u001b[1;1H\u001b[KMem: 34432K used, 206248K free, 3836K shrd, 384K buff, 8008K cached"] +[96.58739, "o", "\r\n"] +[96.590639, "o", "CPU: 0% usr 28% sys 0% nic 7% idle 0% io 0% irq 64% sirq"] +[96.591015, "o", "\r\n"] +[96.595524, "o", "Load average: 0.32 0.29 0.18 2/37 5531"] +[96.595722, "o", "\r\n"] +[96.608711, "o", "\u001b[0m\u001b(B\u001b[7m PID PPID USER STAT VSZ %VSZ %CPU COMMAND"] +[96.608982, "o", "\r\n"] +[96.609479, "o", "\u001b[0m\u001b(B 9 2 root RW 0 0% 50% [ksoftirqd/0]"] +[96.609654, "o", "\r\n"] +[96.610278, "o", " 5531 5474 root R 2828 1% 14% top"] +[96.610432, "o", "\r\n"] +[96.621162, "o", " 10 2 root IW 0 0% 14% [rcu_sched]"] +[96.62135, "o", "\r\n"] +[96.621869, "o", " 5474 1 root S 2972 1% 0% -sh"] +[96.621996, "o", "\r\n"] +[96.622532, "o", " 198 1 root S 2828 1% 0% /sbin/syslogd -n -O /var/log/messages"] +[96.622802, "o", "\r\n"] +[96.623253, "o", " 187 1 root S 2828 1% 0% udhcpc -R -b -p /var/run/udhcpc.eth0.p\u001b[10;80H"] +[96.623426, "o", "\r\n"] +[96.623854, "o", " 201 1 root S 2828 1% 0% /sbin/klogd -n"] +[96.62401, "o", "\r\n"] +[96.624429, "o", " 207 1 root S 2828 1% 0% /sbin/getty 38400 tty1"] +[96.624607, "o", "\r\n"] +[96.625129, "o", " 209 1 root S 2828 1% 0% /sbin/getty 38400 tty2"] +[96.625226, "o", "\r\n"] +[96.62562, "o", " 210 1 root S 2828 1% 0% /sbin/getty 38400 tty3"] +[96.625825, "o", "\r\n"] +[96.626246, "o", " 211 1 root S 2828 1% 0% /sbin/getty 38400 tty4"] +[96.626462, "o", "\r\n"] +[96.635421, "o", " 212 1 root S 2828 1% 0% /sbin/getty 38400 tty5"] +[96.635574, "o", "\r\n"] +[96.635889, "o", " 1 0 root S 2004 1% 0% init [5]"] +[96.636113, "o", "\r\n"] +[96.636631, "o", " 42 2 root SWN 0 0% 0% [kmemleak]"] +[96.636739, "o", "\r\n"] +[96.637233, "o", " 39 2 root IW 0 0% 0% [kworker/0:2-mm_]"] +[96.637352, "o", "\r\n"] +[96.637676, "o", " 7 2 root IW 0 0% 0% [kworker/u2:0-ev]"] +[96.637783, "o", "\r\n"] +[96.638119, "o", " 13 2 root SW 0 0% 0% [kdevtmpfs]"] +[96.638283, "o", "\r\n"] +[96.643324, "o", " 38 2 root IW 0 0% 0% [kworker/u2:1-ev]"] +[96.643513, "o", "\r\n"] +[96.644008, "o", " 43 2 root SW 0 0% 0% [jbd2/vda-8]"] +[96.644118, "o", "\r\n"] +[96.644439, "o", " 34 2 root IW< 0 0% 0% [kworker/0:1H-kb]\r"] +[101.701434, "o", "\u001b[1;1H\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\u001b[1;1H\u001b[KMem: 34520K used, 206160K free, 3840K shrd, 388K buff, 8008K cached"] +[101.701499, "o", "\r\n"] +[101.701748, "o", "CPU: 0% usr 11% sys 0% nic 17% idle 0% io 0% irq 70% sirq"] +[101.701978, "o", "\r\n"] +[101.702444, "o", "Load average: 0.29 0.29 0.18 2/37 5541"] +[101.702673, "o", "\r\n"] +[101.715916, "o", "\u001b[0m\u001b(B\u001b[7m PID PPID USER STAT VSZ %VSZ %CPU COMMAND"] +[101.71616, "o", "\r\n"] +[101.716771, "o", "\u001b[0m\u001b(B 9 2 root SW 0 0% 43% [ksoftirqd/0]"] +[101.71689, "o", "\r\n"] +[101.717274, "o", " 10 2 root IW 0 0% 17% [rcu_sched]"] +[101.717579, "o", "\r\n"] +[101.717921, "o", " 5531 5474 root R 2972 1% 1% top"] +[101.718166, "o", "\r\n"] +[101.718688, "o", " 1 0 root S 2004 1% 1% init [5]"] +[101.718868, "o", "\r\n"] +[101.726882, "o", " 5474 1 root S 2972 1% 0% -sh"] +[101.727162, "o", "\r\n"] +[101.738059, "o", " 198 1 root S 2828 1% 0% /sbin/syslogd -n -O /var/log/messages"] +[101.738201, "o", "\r\n"] +[101.738825, "o", " 187 1 root S 2828 1% 0% udhcpc -R -b -p /var/run/udhcpc.eth0.p\u001b[11;80H"] +[101.739146, "o", "\r\n"] +[101.739547, "o", " 201 1 root S 2828 1% 0% /sbin/klogd -n"] +[101.739785, "o", "\r\n"] +[101.74017, "o", " 207 1 root S 2828 1% 0% /sbin/getty 38400 tty1"] +[101.740448, "o", "\r\n"] +[101.740861, "o", " 209 1 root S 2828 1% 0% /sbin/getty 38400 tty2"] +[101.745102, "o", "\r\n"] +[101.761351, "o", " 210 1 root S 2828 1% 0% /sbin/getty 38400 tty3"] +[101.761869, "o", "\r\n"] +[101.769943, "o", " 211 1 root S 2828 1% 0% /sbin/getty 38400 tty4"] +[101.770407, "o", "\r\n"] +[101.777249, "o", " 212 1 root S 2828 1% 0% /sbin/getty 38400 tty5"] +[101.777746, "o", "\r\n"] +[101.782585, "o", " 42 2 root SWN 0 0% 0% [kmemleak]"] +[101.78294, "o", "\r\n"] +[101.789941, "o", " 39 2 root IW 0 0% 0% [kworker/0:2-eve]"] +[101.790104, "o", "\r\n"] +[101.790657, "o", " 7 2 root IW 0 0% 0% [kworker/u2:0-fl]"] +[101.790924, "o", "\r\n"] +[101.791318, "o", " 13 2 root SW 0 0% 0% [kdevtmpfs]"] +[101.791471, "o", "\r\n"] +[101.791821, "o", " 38 2 root IW 0 0% 0% [kworker/u2:1-ev]"] +[101.792047, "o", "\r\n"] +[101.792515, "o", " 43 2 root SW 0 0% 0% [jbd2/vda-8]"] +[101.792624, "o", "\r\n"] +[101.792795, "o", " 34 2 root IW< 0 0% 0% [kworker/0:1H-kb]\r"] +[106.850435, "o", "\u001b[1;1H\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\u001b[1;1H\u001b[KMem: 34512K used, 206168K free, 3840K shrd, 388K buff, 8012K cached"] +[106.850593, "o", "\r\n"] +[106.850752, "o", "CPU: 0% usr 7% sys 0% nic 17% idle 0% io 0% irq 74% sirq"] +[106.850895, "o", "\r\n"] +[106.851563, "o", "Load average: 0.35 0.30 0.19 2/37 5541"] +[106.851692, "o", "\r\n"] +[106.852789, "o", "\u001b[0m\u001b(B\u001b[7m PID PPID USER STAT VSZ %VSZ %CPU COMMAND"] +[106.852952, "o", "\r\n"] +[106.8535, "o", "\u001b[0m\u001b(B 9 2 root RW 0 0% 43% [ksoftirqd/0]"] +[106.853632, "o", "\r\n"] +[106.854315, "o", " 10 2 root IW 0 0% 17% [rcu_sched]"] +[106.854348, "o", "\r\n"] +[106.865859, "o", " 5531 5474 root R 2972 1% 1% top"] +[106.866331, "o", "\r\n"] +[106.870104, "o", " 5474 1 root S 2972 1% 0% -sh"] +[106.870466, "o", "\r\n"] +[106.879321, "o", " 198 1 root S 2828 1% 0% /sbin/syslogd -n -O /var/log/messages"] +[106.879486, "o", "\r\n"] +[106.880046, "o", " 187 1 root S 2828 1% 0% udhcpc -R -b -p /var/run/udhcpc.eth0.p\u001b[10;80H"] +[106.880262, "o", "\r\n"] +[106.880647, "o", " 201 1 root S 2828 1% 0% /sbin/klogd -n"] +[106.880836, "o", "\r\n"] +[106.881316, "o", " 207 1 root S 2828 1% 0% /sbin/getty 38400 tty1"] +[106.881433, "o", "\r\n"] +[106.881896, "o", " 209 1 root S 2828 1% 0% /sbin/getty 38400 tty2"] +[106.882036, "o", "\r\n"] +[106.882496, "o", " 210 1 root S 2828 1% 0% /sbin/getty 38400 tty3"] +[106.882685, "o", "\r\n"] +[106.892921, "o", " 211 1 root S 2828 1% 0% /sbin/getty 38400 tty4"] +[106.893509, "o", "\r\n"] +[106.90363, "o", " 212 1 root S 2828 1% 0% /sbin/getty 38400 tty5"] +[106.903807, "o", "\r\n"] +[106.904182, "o", " 1 0 root S 2004 1% 0% init [5]"] +[106.904433, "o", "\r\n"] +[106.904902, "o", " 42 2 root SWN 0 0% 0% [kmemleak]"] +[106.905429, "o", "\r\n"] +[106.905782, "o", " 39 2 root IW 0 0% 0% [kworker/0:2-eve]"] +[106.906003, "o", "\r\n"] +[106.906469, "o", " 7 2 root IW 0 0% 0% [kworker/u2:0-fl]"] +[106.906625, "o", "\r\n"] +[106.909123, "o", " 13 2 root SW 0 0% 0% [kdevtmpfs]"] +[106.909318, "o", "\r\n"] +[106.909667, "o", " 38 2 root IW 0 0% 0% [kworker/u2:1-ev]"] +[106.909813, "o", "\r\n"] +[106.91024, "o", " 43 2 root SW 0 0% 0% [jbd2/vda-8]"] +[106.910514, "o", "\r\n"] +[106.91072, "o", " 34 2 root IW< 0 0% 0% [kworker/0:1H-kb]\r"] +[111.956876, "o", "\u001b[1;1H\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\u001b[1;1H\u001b[KMem: 34512K used, 206168K free, 3848K shrd, 388K buff, 8020K cached\r\n"] +[111.9573, "o", "CPU: 0% usr 12% sys 0% nic 15% idle 0% io 0% irq 71% sirq"] +[111.957432, "o", "\r\n"] +[111.957933, "o", "Load average: 0.32 0.29 0.19 2/37 5551"] +[111.958189, "o", "\r\n"] +[111.97146, "o", "\u001b[0m\u001b(B\u001b[7m PID PPID USER STAT VSZ %VSZ %CPU COMMAND"] +[111.971723, "o", "\r\n"] +[111.972186, "o", "\u001b[0m\u001b(B 9 2 root RW 0 0% 44% [ksoftirqd/0]"] +[111.972254, "o", "\r\n"] +[111.972813, "o", " 10 2 root IW 0 0% 17% [rcu_sched]"] +[111.972939, "o", "\r\n"] +[111.973495, "o", " 1 0 root S 2004 1% 2% init [5]"] +[111.973663, "o", "\r\n"] +[111.974138, "o", " 5531 5474 root R 2972 1% 1% top"] +[111.974365, "o", "\r\n"] +[111.980464, "o", " 198 1 root S 2828 1% 0% /sbin/syslogd -n -O /var/log/messages"] +[111.980996, "o", "\r\n"] +[111.994119, "o", " 5474 1 root S 2972 1% 0% -sh"] +[111.994451, "o", "\r\n"] +[112.002489, "o", " 187 1 root S 2828 1% 0% udhcpc -R -b -p /var/run/udhcpc.eth0.p\u001b[11;80H"] +[112.003179, "o", "\r\n"] +[112.017546, "o", " 201 1 root S 2828 1% 0% /sbin/klogd -n"] +[112.018026, "o", "\r\n"] +[112.026893, "o", " 207 1 root S 2828 1% 0% /sbin/getty 38400 tty1"] +[112.027036, "o", "\r\n"] +[112.027601, "o", " 209 1 root S 2828 1% 0% /sbin/getty 38400 tty2"] +[112.027845, "o", "\r\n"] +[112.028265, "o", " 210 1 root S 2828 1% 0% /sbin/getty 38400 tty3"] +[112.028403, "o", "\r\n"] +[112.028856, "o", " 211 1 root S 2828 1% 0% /sbin/getty 38400 tty4"] +[112.028978, "o", "\r\n"] +[112.029317, "o", " 212 1 root S 2828 1% 0% /sbin/getty 38400 tty5"] +[112.029507, "o", "\r\n"] +[112.029848, "o", " 42 2 root SWN 0 0% 0% [kmemleak]"] +[112.030002, "o", "\r\n"] +[112.030344, "o", " 39 2 root IW 0 0% 0% [kworker/0:2-mm_]"] +[112.030576, "o", "\r\n"] +[112.040702, "o", " 7 2 root IW 0 0% 0% [kworker/u2:0-fl]\r\n"] +[112.041062, "o", " 13 2 root SW 0 0% 0% [kdevtmpfs]"] +[112.041124, "o", "\r\n"] +[112.041624, "o", " 38 2 root IW 0 0% 0% [kworker/u2:1-ev]"] +[112.041812, "o", "\r\n"] +[112.042352, "o", " 43 2 root SW 0 0% 0% [jbd2/vda-8]"] +[112.042485, "o", "\r\n"] +[112.042865, "o", " 34 2 root IW< 0 0% 0% [kworker/0:1H-kb]\r"] +[115.116387, "o", "\n\u001b[23;80H \u001b[24;1H"] diff --git a/Documentation/teaching/res/list_evolution.png b/Documentation/teaching/res/list_evolution.png new file mode 100644 index 00000000000000..aa44396dc6d60b Binary files /dev/null and b/Documentation/teaching/res/list_evolution.png differ diff --git a/Documentation/teaching/res/lro.png b/Documentation/teaching/res/lro.png new file mode 100755 index 00000000000000..4c781b96fe06ef Binary files /dev/null and b/Documentation/teaching/res/lro.png differ diff --git a/Documentation/teaching/res/minfs.png b/Documentation/teaching/res/minfs.png new file mode 100644 index 00000000000000..affd823050fb00 Binary files /dev/null and b/Documentation/teaching/res/minfs.png differ diff --git a/Documentation/teaching/res/minfs_arch.png b/Documentation/teaching/res/minfs_arch.png new file mode 100644 index 00000000000000..6778e6638fd60d Binary files /dev/null and b/Documentation/teaching/res/minfs_arch.png differ diff --git a/Documentation/teaching/res/net-dev-hw.png b/Documentation/teaching/res/net-dev-hw.png new file mode 100755 index 00000000000000..8d52e5fa12714d Binary files /dev/null and b/Documentation/teaching/res/net-dev-hw.png differ diff --git a/Documentation/teaching/res/page-fault-handling.png b/Documentation/teaching/res/page-fault-handling.png new file mode 100644 index 00000000000000..7f60933a48f0d0 Binary files /dev/null and b/Documentation/teaching/res/page-fault-handling.png differ diff --git a/Documentation/teaching/res/paging.png b/Documentation/teaching/res/paging.png new file mode 100644 index 00000000000000..53f7fb18c41352 Binary files /dev/null and b/Documentation/teaching/res/paging.png differ diff --git a/Documentation/teaching/res/read.png b/Documentation/teaching/res/read.png new file mode 100644 index 00000000000000..4502fb42271ab6 Binary files /dev/null and b/Documentation/teaching/res/read.png differ diff --git a/Documentation/teaching/res/read2.png b/Documentation/teaching/res/read2.png new file mode 100644 index 00000000000000..6f04b13ee8ae0d Binary files /dev/null and b/Documentation/teaching/res/read2.png differ diff --git a/Documentation/teaching/res/routing-cache.png b/Documentation/teaching/res/routing-cache.png new file mode 100755 index 00000000000000..47dcdcb2a5d59f Binary files /dev/null and b/Documentation/teaching/res/routing-cache.png differ diff --git a/Documentation/teaching/res/schematic.png b/Documentation/teaching/res/schematic.png new file mode 100644 index 00000000000000..89020fb9300465 Binary files /dev/null and b/Documentation/teaching/res/schematic.png differ diff --git a/Documentation/teaching/res/selectors-and-segments.cast b/Documentation/teaching/res/selectors-and-segments.cast new file mode 100644 index 00000000000000..2c04bbeb641743 --- /dev/null +++ b/Documentation/teaching/res/selectors-and-segments.cast @@ -0,0 +1,1818 @@ +{"version": 2, "width": 80, "height": 24, "timestamp": 1617704245, "idle_time_limit": 1.0, "env": {"SHELL": null, "TERM": "xterm"}} +[0.002359, "o", "$ "] +[1.205759, "o", "m"] +[1.286489, "o", "a"] +[1.349222, "o", "k"] +[1.526553, "o", "e"] +[1.643724, "o", " "] +[2.348906, "o", "g"] +[2.575307, "o", "d"] +[2.64632, "o", "b"] +[3.052572, "o", "\r\n"] +[3.074798, "o", "gdb -ex \"target remote localhost:1234\" /linux/vmlinux\r\n"] +[3.117199, "o", "\u001b[35;1m\u001b[35;1mGNU gdb \u001b[m\u001b[35;1m(Ubuntu 9.2-0ubuntu1~20.04) \u001b[m\u001b[35;1m9.2\u001b[m\u001b[35;1m\r\n\u001b[m\u001b[mCopyright (C) 2020 Free Software Foundation, Inc.\r\nLicense GPLv3+: GNU GPL version 3 or later \r\nThis is free software: you are free to change and redistribute it.\r\nThere is NO WARRANTY, to the extent permitted by law.\r\nType \"show copying\" and \"show warranty\" for details.\r\nThis GDB was configured as \"x86_64-linux-gnu\".\r\nType \"show configuration\" for configuration details.\r\nFor bug reporting instructions, please see:\r\n.\r\nFind the GDB manual and other documentation resources online at:\r\n .\r\n\r\nFor help, type \"help\".\r\nType \"apropos word\" to search for commands related to \"word\"...\r\n"] +[3.117763, "o", "Reading symbols from \u001b[32m/linux/vmlinux\u001b[m...\r\n"] +[3.768166, "o", "Remote debugging using localhost:1234\r\n"] +[3.781924, "o", "\u001b[34m0xc15dcb62\u001b[m in \u001b[33mdefault_idle\u001b[m () at \u001b[32m./arch/x86/include/asm/irqflags.h\u001b[m:60\r\n"] +[3.781966, "o", "60\t\tasm volatile(\"sti; hlt\": : :\"memory\");\r\n"] +[3.782406, "o", "(gdb) "] +[4.626339, "o", "b"] +[4.768564, "o", "t"] +[5.44911, "o", "\r\n"] +[5.449728, "o", "#0 \u001b[34m0xc15dcb62\u001b[m in \u001b[33mdefault_idle\u001b[m () at \u001b[32m./arch/x86/include/asm/irqflags.h\u001b[m:60\r\n"] +[5.449863, "o", "#1 \u001b[34m0xc102a0dd\u001b[m in \u001b[33march_cpu_idle\u001b[m () at \u001b[32march/x86/kernel/process.c\u001b[m:680\r\n"] +[5.459029, "o", "#2 \u001b[34m0xc15dcee2\u001b[m in \u001b[33mdefault_idle_call\u001b[m () at \u001b[32mkernel/sched/idle.c\u001b[m:112\r\n#3 \u001b[34m0xc1087fb5\u001b[m in \u001b[33mcpuidle_idle_call\u001b[m () at \u001b[32mkernel/sched/idle.c\u001b[m:194\r\n#4 \u001b[33mdo_idle\u001b[m () at \u001b[32mkernel/sched/idle.c\u001b[m:299\r\n"] +[5.466862, "o", "#5 \u001b[34m0xc1088295\u001b[m in \u001b[33mcpu_startup_entry\u001b[m (\u001b[36mstate=state@entry\u001b[m=CPUHP_ONLINE)\u001b[m\r\n \u001b[m at \u001b[32mkernel/sched/idle.c\u001b[m:395\r\n#6 \u001b[34m0xc15d4ffb\u001b[m in \u001b[33mrest_init\u001b[m () at \u001b[32minit/main.c\u001b[m:721\r\n"] +[5.467037, "o", "#7 \u001b[34m0xc18cd7c4\u001b[m in \u001b[33march_call_rest_init\u001b[m () at \u001b[32minit/main.c\u001b[m:845\r\n#8 \u001b[34m0xc18cdc08\u001b[m in \u001b[33mstart_kernel\u001b[m () at \u001b[32minit/main.c\u001b[m:1061\r\n"] +[5.469609, "o", "#9 \u001b[34m0xc18cd218\u001b[m in \u001b[33mi386_start_kernel\u001b[m () at \u001b[32march/x86/kernel/head32.c\u001b[m:56\r\n"] +[5.470029, "o", "#10 \u001b[34m0xc10001db\u001b[m in \u001b[33mstartup_32_smp\u001b[m () at \u001b[32march/x86/kernel/head_32.S\u001b[m:327\r\n"] +[5.471491, "o", "#11 \u001b[34m0x00000000\u001b[m in \u001b[33m??\u001b[m ()\r\n"] +[5.472127, "o", "(gdb) "] +[6.957539, "o", "#"] +[7.184578, "o", " "] +[9.085494, "o", "i"] +[9.274558, "o", "t"] +[9.385268, "o", " "] +[9.560048, "o", "l"] +[9.72648, "o", "o"] +[9.838927, "o", "o"] +[9.918898, "o", "k"] +[10.059084, "o", "s"] +[10.113284, "o", " "] +[10.246361, "o", "l"] +[10.418976, "o", "i"] +[10.565605, "o", "k"] +[10.641569, "o", "e"] +[10.747208, "o", " "] +[10.9907, "o", "w"] +[11.09312, "o", "e"] +[11.231124, "o", " "] +[11.334085, "o", "a"] +[11.487572, "o", "r"] +[11.577493, "o", "e"] +[11.670981, "o", " "] +[12.627311, "o", "i"] +[12.911014, "o", "n"] +[13.064828, "o", " "] +[13.249479, "o", "k"] +[13.37127, "o", "e"] +[13.460074, "o", "r"] +[13.510226, "o", "n"] +[13.635205, "o", "e"] +[13.973161, "o", "l"] +[14.662931, "o", " "] +[15.131825, "o", "r"] +[15.214966, "o", "u"] +[15.358966, "o", "n"] +[15.487497, "o", "n"] +[15.654743, "o", "i"] +[15.831262, "o", "n"] +[15.996438, "o", "g"] +[16.146163, "o", " "] +[18.220116, "o", "w"] +[18.288027, "o", "i"] +[18.436822, "o", "t"] +[18.509192, "o", "h"] +[18.613143, "o", " "] +[18.807172, "o", "p"] +[18.897976, "o", "r"] +[19.013978, "o", "i"] +[19.204046, "o", "v"] +[19.283747, "o", "i"] +[19.47738, "o", "l"] +[19.544513, "o", "e"] +[19.78487, "o", "g"] +[19.880492, "o", "e"] +[20.602829, "o", " "] +[22.510059, "o", "0"] +[23.73426, "o", "\r\n"] +[23.734632, "o", "(gdb) "] +[24.495347, "o", "#"] +[24.632234, "o", " "] +[24.800588, "o", "l"] +[24.880492, "o", "e"] +[25.0523, "o", "t"] +[25.277878, "o", "s"] +[25.388396, "o", " "] +[25.613203, "o", "c"] +[25.720735, "o", "o"] +[25.817537, "o", "n"] +[25.855844, "o", "f"] +[25.973067, "o", "i"] +[26.212741, "o", "r"] +[26.386844, "o", "m"] +[26.496098, "o", " "] +[26.694327, "o", "t"] +[26.77636, "o", "h"] +[26.823023, "o", "i"] +[26.999112, "o", "s"] +[27.144801, "o", " "] +[27.434198, "o", "b"] +[27.494331, "o", "y"] +[27.714979, "o", " "] +[28.054888, "o", "l"] +[28.257672, "o", "o"] +[28.393068, "o", "o"] +[28.459965, "o", "k"] +[28.691808, "o", "i"] +[28.73292, "o", "n"] +[28.878271, "o", "g"] +[28.991934, "o", " "] +[29.143258, "o", "a"] +[29.367682, "o", "t"] +[29.494524, "o", " "] +[29.62679, "o", "t"] +[29.774843, "o", "h"] +[29.893124, "o", "e"] +[30.004863, "o", " "] +[38.956259, "o", "c"] +[39.046244, "o", "o"] +[39.159799, "o", "d"] +[39.244625, "o", "e"] +[39.30416, "o", " "] +[39.48092, "o", "s"] +[39.597493, "o", "e"] +[39.737666, "o", "l"] +[39.800726, "o", "e"] +[39.892586, "o", "c"] +[40.103179, "o", "t"] +[40.210366, "o", "o"] +[40.277594, "o", "r"] +[40.672707, "o", "\r\n"] +[40.673041, "o", "(gdb) "] +[41.394093, "o", "p"] +[41.550842, "o", "r"] +[41.704995, "o", "i"] +[41.773529, "o", "n"] +[41.869581, "o", "t"] +[42.223515, "o", " "] +[42.884229, "o", "/"] +[43.24162, "o", "$"] +[43.945015, "o", "\b\u001b[K"] +[44.294995, "o", "x"] +[44.490102, "o", " "] +[45.85674, "o", "$"] +[47.797713, "o", "c"] +[47.900134, "o", "s"] +[48.160261, "o", "\r\n"] +[48.160574, "o", "$1 = 0x60\r\n(gdb) "] +[52.875773, "o", "#"] +[53.057592, "o", " "] +[53.270518, "o", "n"] +[53.33, "o", "o"] +[53.420243, "o", "w"] +[53.516445, "o", " "] +[53.674458, "o", "l"] +[53.733163, "o", "e"] +[53.927126, "o", "t"] +[54.113039, "o", "s"] +[54.210531, "o", " "] +[54.350529, "o", "p"] +[54.509267, "o", "r"] +[54.597743, "o", "i"] +[54.668833, "o", "n"] +[54.807039, "o", "t"] +[54.905672, "o", " "] +[55.026232, "o", "t"] +[55.167086, "o", "h"] +[55.301416, "o", "e"] +[55.432852, "o", " "] +[58.048587, "o", "i"] +[58.185341, "o", "n"] +[58.275582, "o", "d"] +[58.355005, "o", "e"] +[58.575251, "o", "x"] +[59.228376, "o", " "] +[59.353553, "o", "f"] +[59.455246, "o", "o"] +[59.576325, "o", "r"] +[59.691869, "o", " "] +[59.775251, "o", "t"] +[59.864488, "o", "h"] +[59.931228, "o", "e"] +[60.017051, "o", " "] +[60.143587, "o", "c"] +[60.242612, "o", "o"] +[60.346778, "o", "d"] +[60.409921, "o", "e"] +[60.503901, "o", " "] +[60.653751, "o", "s"] +[60.791714, "o", "e"] +[60.927913, "o", "l"] +[60.995147, "o", "e"] +[61.100077, "o", "c"] +[61.301378, "o", "t"] +[61.409292, "o", "o"] +[61.472267, "o", "r"] +[61.999105, "o", "\r\n"] +[61.999211, "o", "(gdb) "] +[62.755519, "o", "p"] +[62.920307, "o", "r"] +[63.030171, "o", "i"] +[63.093306, "o", "n"] +[63.153238, "o", "t"] +[63.291438, "o", " "] +[63.834425, "o", "/"] +[64.026843, "o", "x"] +[64.200482, "o", " "] +[64.675073, "o", "\b\u001b[K"] +[64.821291, "o", "\b\u001b[K"] +[64.946712, "o", "\b\u001b[K"] +[66.373086, "o", "$"] +[66.734489, "o", "c"] +[66.85103, "o", "s"] +[67.689872, "o", ">"] +[67.812115, "o", ">"] +[91.768538, "o", "3"] +[92.507215, "o", "\r\n"] +[92.50757, "o", "$2 = 12\r\n(gdb) "] +[97.002473, "o", "#"] +[97.167356, "o", " "] +[97.43684, "o", "a"] +[97.554333, "o", "n"] +[98.078151, "o", "d"] +[98.230006, "o", " "] +[98.41845, "o", "n"] +[98.500863, "o", "o"] +[98.58555, "o", "w"] +[98.697065, "o", " "] +[98.902575, "o", "l"] +[99.036005, "o", "e"] +[99.242403, "o", "t"] +[99.457412, "o", "s"] +[99.570468, "o", " "] +[99.705253, "o", "p"] +[99.822772, "o", "r"] +[99.930937, "o", "i"] +[100.014833, "o", "n"] +[100.091449, "o", "t"] +[100.226161, "o", " "] +[100.331035, "o", "t"] +[100.564914, "o", "e"] +[100.990115, "o", "\b\u001b[K"] +[101.195011, "o", "h"] +[101.275921, "o", "e"] +[101.38122, "o", " "] +[103.755803, "o", "d"] +[103.824373, "o", "e"] +[103.946792, "o", "s"] +[104.052577, "o", "c"] +[104.247014, "o", "r"] +[104.904532, "o", "i"] +[105.058074, "o", "p"] +[105.226938, "o", "t"] +[105.297817, "o", "o"] +[105.408466, "o", "r"] +[105.513496, "o", " "] +[105.845144, "o", "f"] +[105.958437, "o", "o"] +[106.09132, "o", "r"] +[106.167673, "o", " "] +[106.430415, "o", "e"] +[106.855237, "o", "\b\u001b[K"] +[106.932061, "o", "s"] +[107.37581, "o", "e"] +[107.53021, "o", "l"] +[107.576069, "o", "e"] +[107.686763, "o", "c"] +[107.960174, "o", "t"] +[108.090163, "o", "o"] +[108.18428, "o", "r"] +[108.853073, "o", " "] +[109.180343, "o", "1"] +[109.290367, "o", "2"] +[109.8445, "o", "\r\n"] +[109.844823, "o", "(gdb) "] +[111.689231, "o", "#"] +[111.887764, "o", " "] +[112.2626, "o", "f"] +[112.390326, "o", "i"] +[112.490975, "o", "r"] +[112.741024, "o", "s"] +[112.893713, "o", "t"] +[112.982635, "o", " "] +[114.599914, "o", "g"] +[114.696218, "o", "e"] +[114.908597, "o", "t"] +[115.088006, "o", " "] +[115.172965, "o", "t"] +[115.352084, "o", "h"] +[115.459941, "o", "e"] +[115.604134, "o", " "] +[117.664675, "o", "G"] +[117.981042, "o", "T"] +[119.326974, "o", "D"] +[119.594969, "o", " "] +[121.590078, "o", "r"] +[121.687724, "o", "e"] +[121.886761, "o", "g"] +[121.987087, "o", "i"] +[122.037275, "o", "s"] +[122.222018, "o", "t"] +[122.330302, "o", "e"] +[122.429404, "o", "r"] +[123.050835, "o", " "] +[123.19778, "o", "v"] +[123.241349, "o", "a"] +[123.389395, "o", "l"] +[123.597067, "o", "u"] +[123.836279, "o", "e"] +[124.041227, "o", "\r\n"] +[124.041348, "o", "(gdb) "] +[126.327766, "o", "m"] +[126.44815, "o", "o"] +[126.814981, "o", "n"] +[126.886507, "o", "i"] +[127.037664, "o", "t"] +[127.148862, "o", "o"] +[127.266202, "o", "r"] +[127.368858, "o", " "] +[129.678077, "o", "s"] +[129.7363, "o", "h"] +[129.800484, "o", "o"] +[129.908385, "o", "w"] +[130.033271, "o", " "] +[131.662846, "o", "\b\u001b[K"] +[131.803395, "o", "\b\u001b[K"] +[131.940196, "o", "\b\u001b[K"] +[132.073194, "o", "\b\u001b[K"] +[132.18792, "o", "\b\u001b[K"] +[132.45787, "o", "i"] +[132.530376, "o", "n"] +[132.776408, "o", "g"] +[132.872135, "o", "o"] +[132.986392, "o", " "] +[133.379472, "o", "\b\u001b[K"] +[133.511446, "o", "\b\u001b[K"] +[133.626516, "o", "\b\u001b[K"] +[133.786323, "o", "f"] +[133.863781, "o", "o"] +[133.998018, "o", " "] +[134.086118, "o", "r"] +[134.560701, "o", "e"] +[134.886634, "o", "g"] +[134.991773, "o", "i"] +[135.080584, "o", "s"] +[135.2521, "o", "t"] +[135.343722, "o", "e"] +[135.452922, "o", "r"] +[135.647215, "o", "s"] +[135.73628, "o", "\r\n"] +[135.73688, "o", "EAX=0000"] +[135.737118, "o", "0000 EBX=00000000 ECX=ffffffff EDX=0"] +[135.737339, "o", "0000000\r\r\n"] +[135.737619, "o", "ESI=00000000 EDI=00000000 EBP=c17cff1"] +[135.737925, "o", "c ESP=c17cff18\r\r\nEIP=c15dcb62 EFL=0020024"] +[135.738173, "o", "6 [---Z-P-] CPL=0 II=0 A20=1 "] +[135.738429, "o", "SMM=0 HLT=1\r\r\nES =007b 00000000 ffffffff 00cff300 DPL=3 DS "] +[135.738589, "o", " [-WA]\r\r\nCS =006"] +[135.738828, "o", "0 00000000 ffffffff 00cf9a00 DPL=0 CS"] +[135.739037, "o", "32 [-R-]\r\r\nSS =0068 00000000"] +[135.739202, "o", " ffffffff 00cf9300 DPL=0 DS"] +[135.739242, "o", " [-WA]"] +[135.739526, "o", "\r\r\n"] +[135.739726, "o", "DS =007b 00000000 ffffffff 00cff300 DPL=3 DS ["] +[135.739962, "o", "-WA]\r\r\nFS =00d8 0e47b000 ffffffff 00"] +[135.740249, "o", "8f9300 DPL=0 DS16 [-WA]\r\r\nGS =00e0 cfdc"] +[135.740452, "o", "b200 00000018 00409100 DPL=0 DS [--"] +[135.740678, "o", "A]\r\r\nLDT=0000 0000000"] +[135.740839, "o", "0 00000000 00008200 DPL=0 LDT"] +[135.741014, "o", "\r\r\nTR =0080 ff806"] +[135.741163, "o", "000 0000407b 00008900 DPL"] +[135.741313, "o", "=0 TSS32-avl\r\r\n"] +[135.741459, "o", "GDT= ff801000 000000"] +[135.741543, "o", "ff\r\r\nIDT= ff800000 "] +[135.741584, "o", "000007ff\r\r\nCR0=80050"] +[135.741622, "o", "033 CR2=080919ab CR3=0"] +[135.741657, "o", "a450000 CR4=00000690\r"] +[135.741692, "o", "\r\nDR0=00000000 D"] +[135.741729, "o", "R1=00000000 DR2=0000"] +[135.741757, "o", "0000 DR3=0000000"] +[135.741795, "o", "0 \r\r\nDR6=ffff0ff0 D"] +[135.741835, "o", "R7=00000400\r\r\nEFER=000"] +[135.741869, "o", "0000000000000\r\r\n"] +[135.741904, "o", "FCW=037f FSW=0000 [S"] +[135.741935, "o", "T=0] FTW=00 MXCSR=0"] +[135.741964, "o", "0001f80\r\r\nFP"] +[135.742009, "o", "R0=0000000000000000 0000 FPR1"] +[135.742053, "o", "=0000000000000000 0000\r\r\n"] +[135.742095, "o", "FPR2=0000000000000000 0000 "] +[135.742136, "o", "FPR3=0000000000000000 000"] +[135.742173, "o", "0\r\r\nFPR4=0000000"] +[135.742212, "o", "000000000 0000 FPR5=000000"] +[135.742251, "o", "0000000000 0000\r\r\n"] +[135.742292, "o", "FPR6=0000000000000000 0000 FPR7=00000"] +[135.742328, "o", "00000000000 0000\r\r\n"] +[135.742365, "o", "XMM00=000000000000000"] +[135.7424, "o", "00000000000000000 XMM"] +[135.742436, "o", "01=0000000000000000000"] +[135.742472, "o", "0000000000000\r\r\n"] +[135.742508, "o", "XMM02=000000000000000"] +[135.742541, "o", "00000000000000000 XM"] +[135.742574, "o", "M03=00000000000000000"] +[135.742609, "o", "000000000000000\r\r\n"] +[135.742645, "o", "XMM04=000000000000"] +[135.74268, "o", "00000000000000000000 "] +[135.742716, "o", "XMM05=000000000000000000"] +[135.742753, "o", "00000000000000\r\r\n"] +[135.742792, "o", "XMM06=00000000000000000"] +[135.742829, "o", "000000000000000 XMM07=0"] +[135.74287, "o", "00000000000000000000000000"] +[135.742906, "o", "00000\r\r\n"] +[135.742943, "o", "(gdb) "] +[141.08448, "o", "g"] +[142.487912, "o", "\b\u001b[K"] +[142.687201, "o", "s"] +[142.782022, "o", "e"] +[142.929487, "o", "t"] +[143.097996, "o", " "] +[143.953732, "o", "$"] +[144.988721, "o", "g"] +[145.23854, "o", "d"] +[145.51718, "o", "t"] +[145.779607, "o", "r"] +[145.912464, "o", "="] +[154.09522, "o", "ff801000"] +[155.179972, "o", "\b"] +[155.680268, "o", "\b"] +[155.710228, "o", "\b"] +[155.740097, "o", "\b"] +[155.770866, "o", "\b"] +[155.801924, "o", "\b"] +[155.83298, "o", "\b"] +[155.863409, "o", "\b"] +[155.893282, "o", "\b"] +[155.923804, "o", "\b"] +[155.954806, "o", "\b"] +[156.173502, "o", "\u001b[C"] +[156.329491, "o", "\u001b[C"] +[156.44832, "o", "\u001b[C"] +[157.136526, "o", "0ff801000\b\b\b\b\b\b\b\b"] +[157.277804, "o", "xff801000\b\b\b\b\b\b\b\b"] +[157.772194, "o", "\r\n"] +[157.793131, "o", "(gdb) "] +[165.346319, "o", "#"] +[165.531054, "o", " "] +[165.974981, "o", "d"] +[166.056174, "o", "e"] +[166.210313, "o", "s"] +[166.277392, "o", "c"] +[166.519134, "o", "r"] +[166.620771, "o", "i"] +[166.731554, "o", "p"] +[166.883229, "o", "t"] +[166.955686, "o", "o"] +[167.072479, "o", "r"] +[167.204083, "o", " "] +[167.85161, "o", "\b\u001b[K"] +[167.982418, "o", "s"] +[168.100856, "o", " "] +[168.564966, "o", "h"] +[168.64102, "o", "a"] +[168.823243, "o", "v"] +[168.894548, "o", "e"] +[168.970183, "o", " "] +[169.176948, "o", "8"] +[169.334163, "o", " "] +[169.573922, "o", "b"] +[169.624225, "o", "y"] +[169.858766, "o", "t"] +[169.930975, "o", "e"] +[170.144177, "o", "s"] +[172.414362, "o", "\r\n"] +[172.414409, "o", "(gdb) "] +[173.311252, "o", "p"] +[173.662611, "o", "r"] +[173.785984, "o", "i"] +[173.865018, "o", "n"] +[173.983028, "o", "t"] +[174.148239, "o", " "] +[175.179933, "o", "/"] +[175.318199, "o", "x"] +[176.328504, "o", " "] +[178.585994, "o", "("] +[179.152768, "o", "u"] +[179.670353, "o", "i"] +[179.715934, "o", "n"] +[180.433924, "o", "t"] +[180.712095, "o", "6"] +[180.79663, "o", "4"] +[181.065001, "o", "_"] +[181.287562, "o", "t"] +[181.668811, "o", ")"] +[183.515012, "o", "$"] +[185.687333, "o", "\b\u001b[K"] +[185.866732, "o", "\b\u001b[K"] +[186.602865, "o", "*"] +[186.86583, "o", ")"] +[187.515633, "o", "$"] +[189.118783, "o", "d"] +[190.202703, "o", "\b\u001b[K"] +[190.355202, "o", "g"] +[190.666691, "o", "d"] +[190.945529, "o", "t"] +[191.772629, "o", "r"] +[192.777949, "o", "\b"] +[193.278082, "o", "\b"] +[193.30829, "o", "\b"] +[193.338654, "o", "\b"] +[193.369524, "o", "\b"] +[193.399722, "o", "\b"] +[193.430236, "o", "\b"] +[193.460701, "o", "\b"] +[193.491262, "o", "\b"] +[193.521133, "o", "\b"] +[193.551404, "o", "\b"] +[193.581346, "o", "\b"] +[193.612343, "o", "\b"] +[193.642766, "o", "\b"] +[193.673954, "o", "\b"] +[193.704329, "o", "\b"] +[193.735075, "o", "\b"] +[193.766105, "o", "\b"] +[193.968996, "o", "\u001b[C"] +[194.133982, "o", "\u001b[C"] +[195.116582, "o", "\u001b[C(uint64_t*)$gdtr\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b"] +[195.840287, "o", "\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C"] +[196.582034, "o", ")"] +[197.108677, "o", "["] +[197.615, "o", "1"] +[197.748952, "o", "2"] +[197.846455, "o", "]"] +[199.192246, "o", "\r\n"] +[199.200699, "o", "$3 = 0xcf9a000000ffff\r\n"] +[199.20087, "o", "(gdb) "] +[200.899642, "o", "print /x ((uint64_t*)$gdtr)[12]"] +[201.157767, "o", "\b"] +[201.65802, "o", "\b"] +[201.688325, "o", "\b"] +[201.718252, "o", "\b"] +[201.74874, "o", "\b"] +[201.779962, "o", "\b"] +[201.811199, "o", "\b"] +[201.842097, "o", "\b"] +[201.87248, "o", "\b"] +[201.903057, "o", "\b"] +[201.933056, "o", "\b"] +[201.963408, "o", "\b"] +[201.993637, "o", "\b"] +[202.024063, "o", "\b"] +[202.054562, "o", "\b"] +[202.08602, "o", "\b"] +[202.116448, "o", "\b"] +[202.145957, "o", "\b"] +[202.177003, "o", "\b"] +[202.207484, "o", "\b"] +[202.237255, "o", "\b"] +[202.267668, "o", "\b"] +[202.29822, "o", "\b"] +[202.328265, "o", "\b"] +[202.603106, "o", "\u001b[C"] +[202.908862, "o", "\b\u001b[1P ((uint64_t*)$gdtr)[12]\r\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C"] +[203.068444, "o", "z ((uint64_t*)$gdtr)[12]\r\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C"] +[203.245412, "o", "\r\n"] +[203.245996, "o", "$4 = 0x00cf9a000000ffff\r\n"] +[203.246341, "o", "(gdb) "] +[213.714469, "o", "#"] +[213.924177, "o", " "] +[216.1305, "o", "d"] +[216.188454, "o", "e"] +[216.363086, "o", "c"] +[216.450319, "o", "o"] +[216.564475, "o", "d"] +[216.634156, "o", "e"] +[216.77561, "o", " "] +[216.897562, "o", "t"] +[217.009052, "o", "h"] +[217.107284, "o", "e"] +[217.195724, "o", " "] +[217.436172, "o", "l"] +[217.641538, "o", "i"] +[217.77799, "o", "m"] +[217.84922, "o", "i"] +[217.98972, "o", "t"] +[220.023935, "o", "\r\n"] +[220.024319, "o", "(gdb) "] +[220.290728, "o", "p"] +[220.450692, "o", "r"] +[220.533847, "o", "i"] +[220.588309, "o", "n"] +[220.673191, "o", "t"] +[220.969452, "o", " "] +[221.640322, "o", "/"] +[221.835579, "o", "x"] +[222.015648, "o", " "] +[229.910833, "o", "0"] +[230.018988, "o", "x"] +[235.196883, "o", "ffff"] +[236.417946, "o", " "] +[236.798758, "o", "*"] +[237.041762, "o", " "] +[239.365773, "o", "4"] +[239.475724, "o", "0"] +[239.686894, "o", "9"] +[240.006818, "o", "6"] +[241.88331, "o", "\r\n"] +[241.883649, "o", "$5 = 0xffff000\r\n(gdb) "] +[247.790867, "o", "#"] +[247.888405, "o", " "] +[248.078816, "o", "l"] +[248.234787, "o", "i"] +[248.378544, "o", "m"] +[248.444722, "o", "i"] +[248.608612, "o", "t"] +[248.721834, "o", " "] +[248.969296, "o", "i"] +[249.108096, "o", "s"] +[249.213166, "o", " "] +[249.852357, "o", "s"] +[249.970989, "o", "e"] +[250.175166, "o", "t"] +[250.316772, "o", " "] +[250.519205, "o", "t"] +[250.639319, "o", "o"] +[250.753856, "o", " "] +[251.306554, "o", "4"] +[251.870532, "o", "G"] +[252.09369, "o", "B"] +[253.582197, "o", "\r\n"] +[253.582499, "o", "(gdb) "] +[360.143066, "o", "s"] +[360.281378, "o", "e"] +[360.470522, "o", "t"] +[360.613987, "o", " "] +[361.843991, "o", "$"] +[362.851023, "o", "k"] +[362.985051, "o", "e"] +[363.042871, "o", "r"] +[363.130172, "o", "n"] +[363.188481, "o", "e"] +[363.324746, "o", "l"] +[363.842416, "o", "_"] +[365.495183, "o", "c"] +[365.631849, "o", "o"] +[365.731124, "o", "d"] +[365.781546, "o", "e"] +[371.021879, "o", "="] +[371.101039, "o", " "] +[371.423014, "o", "\b\u001b[K"] +[371.56274, "o", "\b\u001b[K"] +[371.676663, "o", " "] +[371.793814, "o", "="] +[371.916711, "o", " "] +[378.293839, "o", "((uint64_t*)$gdtr)[12]"] +[380.299588, "o", "\r\n"] +[380.320761, "o", "(gdb) "] +[382.691282, "o", "p"] +[383.391885, "o", "\b\u001b[K"] +[384.142072, "o", "#"] +[384.312701, "o", " "] +[384.457712, "o", "d"] +[384.555394, "o", "e"] +[384.722997, "o", "c"] +[384.799253, "o", "o"] +[384.920935, "o", "d"] +[384.993824, "o", "e"] +[385.075237, "o", " "] +[385.230561, "o", "t"] +[385.306128, "o", "h"] +[385.402699, "o", "e"] +[385.524367, "o", " "] +[385.683035, "o", "b"] +[385.742431, "o", "a"] +[385.818729, "o", "s"] +[385.945102, "o", "e"] +[386.563467, "o", "\r\n"] +[386.563785, "o", "(gdb) "] +[387.807248, "o", "p"] +[388.007592, "o", "r"] +[388.058408, "o", "i"] +[388.11304, "o", "n"] +[388.226837, "o", "t"] +[388.33127, "o", " "] +[388.554004, "o", "/"] +[388.663181, "o", "x"] +[388.747275, "o", " "] +[400.152432, "o", "$kernel_code"] +[401.172444, "o", ">"] +[401.31128, "o", ">"] +[402.246532, "o", "3"] +[402.36536, "o", "2"] +[403.041876, "o", ")"] +[403.470546, "o", "\b"] +[403.971061, "o", "\b"] +[404.00112, "o", "\b"] +[404.031651, "o", "\b"] +[404.062697, "o", "\b"] +[404.093786, "o", "\b"] +[404.124324, "o", "\b"] +[404.154917, "o", "\b"] +[404.186219, "o", "\b"] +[404.215797, "o", "\b"] +[404.245841, "o", "\b"] +[404.277153, "o", "\b"] +[404.307534, "o", "\b"] +[404.337518, "o", "\b"] +[404.367197, "o", "\b"] +[404.39724, "o", "\b"] +[404.427051, "o", "\b"] +[404.457149, "o", "\b"] +[404.487796, "o", "\b"] +[404.518995, "o", "\b"] +[404.717726, "o", "\u001b[C"] +[404.856778, "o", "\u001b[C"] +[404.998772, "o", "\u001b[C"] +[405.714354, "o", "($kernel_code>>32)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b"] +[406.325269, "o", "\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C"] +[408.028055, "o", "&"] +[409.373772, "o", "0"] +[409.514542, "o", "x"] +[409.995675, "o", "F"] +[410.205691, "o", "F"] +[410.76551, "o", "0"] +[411.008743, "o", "0"] +[411.636305, "o", "0"] +[411.810194, "o", "0"] +[412.647261, "o", "0"] +[412.820582, "o", "0"] +[413.342493, "o", ")"] +[413.718701, "o", "\b"] +[414.218638, "o", "\b"] +[414.248087, "o", "\b"] +[414.278658, "o", "\b"] +[414.309504, "o", "\b"] +[414.339242, "o", "\b"] +[414.369913, "o", "\b"] +[414.401266, "o", "\b"] +[414.431457, "o", "\b"] +[414.462205, "o", "\b"] +[414.492336, "o", "\b"] +[414.522837, "o", "\b"] +[414.553148, "o", "\b"] +[414.583161, "o", "\b"] +[414.614069, "o", "\b"] +[414.644562, "o", "\b"] +[414.675091, "o", "\b"] +[414.706454, "o", "\b"] +[414.737365, "o", "\b"] +[414.767321, "o", "\b"] +[414.797448, "o", "\b"] +[414.82805, "o", "\b"] +[414.858649, "o", "\b"] +[414.888968, "o", "\b"] +[414.918934, "o", "\b"] +[414.949273, "o", "\b"] +[414.979232, "o", "\b"] +[415.009292, "o", "\b"] +[415.039545, "o", "\b"] +[415.195848, "o", "\b"] +[415.379056, "o", "\b"] +[415.654254, "o", "\u001b[C"] +[416.120906, "o", "\u001b[C($kernel_code>>32)&0xFF000000)\r\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C"] +[416.670056, "o", "\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C"] +[417.156581, "o", "|"] +[418.078121, "o", "("] +[418.078121, "o", "("] +[418.501176, "o", "$"] +[419.061164, "o", "k"] +[419.257984, "o", "e"] +[419.35113, "o", "r"] +[419.389884, "o", "n"] +[419.462042, "o", "e"] +[419.567743, "o", "l"] +[419.859026, "o", "_"] +[420.010047, "o", "c"] +[420.089005, "o", "o"] +[420.193463, "o", "d"] +[420.274683, "o", "e"] +[423.236442, "o", ">"] +[423.370439, "o", ">"] +[425.001308, "o", "1"] +[425.061638, "o", "6"] +[426.234482, "o", ")"] +[427.499144, "o", "&"] +[428.572962, "o", "0"] +[428.702492, "o", "x"] +[429.690473, "o", "0"] +[429.827397, "o", "0"] +[430.421525, "o", "F"] +[430.556561, "o", "F"] +[431.06108, "o", "F"] +[431.192942, "o", "F"] +[431.550652, "o", "F"] +[431.692892, "o", "F"] +[432.118055, "o", ")"] +[433.280631, "o", "\r\n"] +[493.456846, "o", "$9 = 0x0\r\n(gdb) "] +[499.686291, "o", "#"] +[499.879719, "o", " "] +[500.923126, "o", "b"] +[500.981395, "o", "a"] +[501.053439, "o", "s"] +[501.206446, "o", "e"] +[501.347024, "o", " "] +[501.498974, "o", "i"] +[501.603969, "o", "s"] +[501.939405, "o", " "] +[502.084015, "o", "0"] +[502.685399, "o", "\r\n"] +[502.685665, "o", "(gdb) "] +[505.05193, "o", "#"] +[506.321549, "o", " "] +[506.981152, "o", "d"] +[507.072632, "o", "e"] +[507.249979, "o", "c"] +[507.287012, "o", "o"] +[507.464576, "o", "d"] +[507.54832, "o", "e"] +[507.691229, "o", " "] +[507.964173, "o", "t"] +[508.11861, "o", "h"] +[508.267776, "o", "e"] +[508.361951, "o", " "] +[508.80405, "o", "p"] +[508.905761, "o", "r"] +[509.016474, "o", "i"] +[509.158256, "o", "v"] +[509.217248, "o", "i"] +[509.408068, "o", "l"] +[509.508072, "o", "e"] +[509.914455, "o", "g"] +[509.994915, "o", "e"] +[511.133299, "o", "\b"] +[511.633839, "o", "\b"] +[511.664354, "o", "\b"] +[511.694443, "o", "\b"] +[511.725474, "o", "\b"] +[511.756119, "o", "\b"] +[511.786986, "o", "\b"] +[511.818226, "o", "\b"] +[512.015711, "o", "\b"] +[512.25971, "o", "rprivilege\b\b\b\b\b\b\b\b\b"] +[512.39865, "o", "qprivilege\b\b\b\b\b\b\b\b\b"] +[512.600873, "o", "uprivilege\b\b\b\b\b\b\b\b\b"] +[512.651473, "o", "iprivilege\b\b\b\b\b\b\b\b\b"] +[512.754197, "o", "rprivilege\b\b\b\b\b\b\b\b\b"] +[512.836059, "o", "eprivilege\b\b\b\b\b\b\b\b\b"] +[513.846698, "o", "\b\u001b[1Pprivilege\b\b\b\b\b\b\b\b\b"] +[513.976248, "o", "\b\u001b[1Pprivilege\b\b\b\b\b\b\b\b\b"] +[514.095038, "o", "\b\u001b[1Pprivilege\b\b\b\b\b\b\b\b\b"] +[514.217603, "o", "\b\u001b[1Pprivilege\b\b\b\b\b\b\b\b\b"] +[514.348636, "o", "\b\u001b[1Pprivilege\b\b\b\b\b\b\b\b\b"] +[514.462555, "o", "eprivilege\b\b\b\b\b\b\b\b\b"] +[514.684859, "o", "qprivilege\b\b\b\b\b\b\b\b\b"] +[514.871245, "o", "uprivilege\b\b\b\b\b\b\b\b\b"] +[514.908025, "o", "iprivilege\b\b\b\b\b\b\b\b\b"] +[514.972336, "o", "rprivilege\b\b\b\b\b\b\b\b\b"] +[515.040145, "o", "eprivilege\b\b\b\b\b\b\b\b\b"] +[515.195378, "o", "dprivilege\b\b\b\b\b\b\b\b\b"] +[515.274181, "o", " privilege\b\b\b\b\b\b\b\b\b"] +[515.57074, "o", "\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C"] +[516.018703, "o", " "] +[516.187684, "o", "l"] +[516.246874, "o", "e"] +[516.472241, "o", "v"] +[516.555511, "o", "e"] +[516.581353, "o", "l"] +[517.033643, "o", "\r\n"] +[517.033753, "o", "(gdb) "] +[518.250999, "o", "p"] +[518.390471, "o", "r"] +[518.47528, "o", "i"] +[518.539113, "o", "n"] +[518.59094, "o", "t"] +[518.697485, "o", " "] +[518.870622, "o", "/"] +[518.974887, "o", "x"] +[519.117062, "o", " "] +[541.606002, "o", "$"] +[544.466982, "o", "k"] +[544.572664, "o", "e"] +[544.632867, "o", "r"] +[544.722191, "o", "n"] +[544.773134, "o", "e"] +[544.907561, "o", "l"] +[545.179853, "o", "_"] +[545.326522, "o", "c"] +[545.386052, "o", "o"] +[545.50024, "o", "d"] +[545.567089, "o", "e"] +[545.90589, "o", ">"] +[546.035073, "o", ">"] +[546.773187, "o", "4"] +[546.97104, "o", "5"] +[547.369708, "o", ")"] +[547.761588, "o", "\b\b\b"] +[548.152916, "o", "\b\b\b\b\b\b"] +[548.498199, "o", "\b\b\b\b\b\b\b"] +[549.243467, "o", "\b"] +[549.640345, "o", "($kernel_code>>45)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b"] +[550.024338, "o", "\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C"] +[550.898538, "o", "&"] +[551.862487, "o", "3"] +[552.10327, "o", "\r\n"] +[552.103562, "o", "$10 = 0x0\r\n(gdb) "] +[554.033804, "o", "#"] +[554.232669, "o", " "] +[554.419128, "o", "k"] +[554.519684, "o", "e"] +[555.554111, "o", "r"] +[556.306276, "o", "n"] +[556.564753, "o", "e"] +[556.649777, "o", "l"] +[556.812466, "o", " "] +[557.304025, "o", "p"] +[557.473156, "o", "r"] +[557.570501, "o", "i"] +[557.787916, "o", "v"] +[557.846584, "o", "i"] +[558.020526, "o", "l"] +[558.087162, "o", "e"] +[558.286556, "o", "g"] +[558.375973, "o", "e"] +[559.006899, "o", "\b\u001b[K"] +[559.154619, "o", "\b\u001b[K"] +[559.862659, "o", "e"] +[560.565269, "o", "\r\n"] +[560.565382, "o", "(gdb) "] +[566.281427, "o", "#"] +[566.60638, "o", " "] +[566.88704, "o", "n"] +[566.979938, "o", "o"] +[567.097734, "o", "w"] +[567.217521, "o", " "] +[567.40312, "o", "l"] +[567.48597, "o", "e"] +[567.691523, "o", "t"] +[567.944728, "o", "s"] +[568.133886, "o", " "] +[568.530377, "o", "d"] +[568.722282, "o", " "] +[569.182034, "o", "\b\u001b[K"] +[569.330181, "o", "o"] +[569.464696, "o", " "] +[569.582842, "o", "t"] +[569.71244, "o", "h"] +[569.775485, "o", "e"] +[569.904672, "o", " "] +[570.06808, "o", "s"] +[570.113381, "o", "a"] +[570.267076, "o", "m"] +[570.401127, "o", "e"] +[570.54668, "o", " "] +[571.537724, "o", "f"] +[571.657201, "o", "o"] +[571.830889, "o", "r"] +[571.931437, "o", " "] +[573.275644, "o", "u"] +[573.370696, "o", "s"] +[573.478114, "o", "e"] +[573.547114, "o", "r"] +[573.753181, "o", "s"] +[573.888213, "o", "p"] +[573.935344, "o", "a"] +[574.131183, "o", "c"] +[574.286447, "o", "e"] +[575.340632, "o", "\r\n"] +[575.340777, "o", "(gdb) "] +[575.738126, "o", "#"] +[575.938845, "o", " "] +[576.16047, "o", "f"] +[576.290807, "o", "i"] +[576.374712, "o", "r"] +[576.561113, "o", "s"] +[576.696249, "o", "t"] +[576.792918, "o", " "] +[581.320765, "o", "s"] +[581.435293, "o", "e"] +[581.551576, "o", "t"] +[581.69328, "o", "u"] +[581.77331, "o", "p"] +[582.019538, "o", " "] +[582.184776, "o", "a"] +[582.341777, "o", " "] +[582.940185, "o", "p"] +[583.323301, "o", "r"] +[583.369325, "o", "e"] +[583.536369, "o", "n"] +[584.034225, "o", "\b\u001b[K"] +[584.16717, "o", "\b\u001b[K"] +[584.29082, "o", "\b\u001b[K"] +[584.411253, "o", "\b\u001b[K"] +[584.683076, "o", "b"] +[584.765623, "o", "r"] +[584.845741, "o", "e"] +[584.922373, "o", "a"] +[584.981172, "o", "k"] +[585.213372, "o", "p"] +[585.312638, "o", "o"] +[585.50566, "o", "i"] +[585.538534, "o", "n"] +[585.679473, "o", "t"] +[585.76966, "o", " "] +[586.004301, "o", "t"] +[586.134879, "o", "o"] +[586.349202, "o", " "] +[586.655069, "o", "a"] +[586.776202, "o", " "] +[586.989063, "o", "s"] +[587.158287, "o", "y"] +[587.231328, "o", "s"] +[587.430585, "o", "t"] +[587.481228, "o", "e"] +[587.580052, "o", "m"] +[587.651081, "o", " "] +[588.604948, "o", "\b\u001b[K"] +[589.104855, "o", "\b\u001b[K"] +[589.135382, "o", "\b\u001b[K"] +[589.165688, "o", "\b\u001b[K"] +[589.197272, "o", "\b\u001b[K"] +[589.227213, "o", "\b\u001b[K"] +[589.258689, "o", "\b\u001b[K"] +[589.289664, "o", "\b\u001b[K"] +[589.318764, "o", "\b\u001b[K"] +[589.348938, "o", "\b\u001b[K"] +[589.844186, "o", " "] +[589.954736, "o", "t"] +[590.043756, "o", "h"] +[590.136384, "o", "e"] +[590.207131, "o", " "] +[590.312478, "o", "s"] +[590.426546, "o", "y"] +[590.468176, "o", "s"] +[590.614861, "o", "t"] +[590.700094, "o", "e"] +[591.025033, "o", "l"] +[592.076639, "o", "\b\u001b[K"] +[592.265751, "o", "m"] +[592.354996, "o", " "] +[592.477158, "o", "c"] +[592.527122, "o", "a"] +[592.591157, "o", "l"] +[592.688401, "o", "l"] +[592.749225, "o", " "] +[592.888391, "o", "e"] +[592.997816, "o", "n"] +[593.106259, "o", "t"] +[593.286703, "o", "r"] +[593.354202, "o", "y"] +[593.592532, "o", "\r\n"] +[593.592811, "o", "(gdb) "] +[594.298864, "o", "b"] +[594.370713, "o", "r"] +[594.483049, "o", "e"] +[594.563026, "o", "a"] +[594.63206, "o", "k"] +[594.734457, "o", " "] +[595.876785, "o", "e"] +[595.974998, "o", "n"] +[596.08727, "o", "t"] +[596.265929, "o", "r"] +[596.355891, "o", "y"] +[596.67381, "o", "_"] +[601.569523, "o", "\u0007"] +[601.660172, "o", "\r\nentry_32.S entry_SYSENTER_32 entry_number \r\nentry_INT80_32 entry_eip entry_stack_page \r\n(gdb) break entry_S"] +[603.152017, "o", "\b\u001b[K"] +[603.60971, "o", "I"] +[603.858406, "o", "N"] +[604.204372, "o", "T80_32 "] +[605.312061, "o", "\r\n"] +[605.363107, "o", "Breakpoint 1 at \u001b[34m0xc15de874\u001b[m: file \u001b[32march/x86/entry/entry_32.S\u001b[m, line 1020.\r\n"] +[605.363155, "o", "(gdb) "] +[606.887754, "o", "c"] +[608.705129, "o", "\r\n"] +[608.705459, "o", "Continuing.\r\n"] +[610.971565, "o", "\r\n"] +[610.971946, "o", "Breakpoint 1, \u001b[34m0xc15de874\u001b[m in \u001b[33mentry_INT80_32\u001b[m () at \u001b[32march/x86/entry/entry_32.S\u001b[m:1020\r\n"] +[610.972302, "o", "1020\t\tjmp\t.Lsysenter_flags_fixed\r\n"] +[610.972535, "o", "(gdb) "] +[616.192863, "o", "n"] +[617.72471, "o", "\r\n"] +[617.734068, "o", "1054\t\tpushl\t%eax\t\t\t/* pt_regs->orig_ax */\r\n"] +[617.734278, "o", "(gdb) "] +[619.7212, "o", "#"] +[619.906381, "o", " "] +[620.188614, "o", "s"] +[620.422791, "o", "t"] +[620.477236, "o", "e"] +[620.659615, "o", "p"] +[621.259597, "o", " "] +[621.492054, "o", "t"] +[621.59708, "o", "h"] +[621.685757, "o", "r"] +[621.736727, "o", "o"] +[621.981907, "o", "u"] +[622.322864, "o", "\b\u001b[K"] +[622.460272, "o", "\b\u001b[K"] +[622.594567, "o", "\b\u001b[K"] +[622.718254, "o", "\b\u001b[K"] +[622.947025, "o", "r"] +[623.072527, "o", "o"] +[623.203739, "o", "u"] +[623.609466, "o", "\b\u001b[K"] +[623.751747, "o", "\b\u001b[K"] +[623.853592, "o", "\b\u001b[K"] +[624.129617, "o", "h"] +[624.399671, "o", "r"] +[624.477659, "o", "o"] +[624.624296, "o", "u"] +[624.734072, "o", "g"] +[624.834819, "o", "h"] +[624.960495, "o", " "] +[625.036927, "o", "t"] +[625.128099, "o", "h"] +[625.206121, "o", "e"] +[625.324744, "o", " "] +[625.524612, "o", "s"] +[625.726291, "o", "y"] +[625.819857, "o", "s"] +[625.995635, "o", "t"] +[626.050073, "o", "e"] +[626.118718, "o", "m"] +[626.245506, "o", " "] +[626.448433, "o", "c"] +[626.490398, "o", "a"] +[626.60857, "o", "l"] +[626.748098, "o", "l"] +[626.867184, "o", " "] +[627.249677, "o", "u"] +[627.324825, "o", "n"] +[627.753751, "o", "t"] +[627.862279, "o", "i"] +[627.950531, "o", "l"] +[628.071181, "o", " "] +[628.193645, "o", "t"] +[628.325551, "o", "h"] +[628.396454, "o", "e"] +[628.50338, "o", " "] +[628.954182, "o", "e"] +[629.072892, "o", "n"] +[629.169954, "o", "d"] +[629.907936, "o", "\r\n"] +[629.90828, "o", "(gdb) "] +[632.365504, "o", "d"] +[632.443146, "o", "e"] +[632.536882, "o", "l"] +[632.664697, "o", " "] +[632.875456, "o", "b"] +[633.037126, "o", "e"] +[633.128086, "o", "a"] +[633.208296, "o", "k"] +[633.771397, "o", "\b\u001b[K"] +[633.909143, "o", "\b\u001b[K"] +[634.023021, "o", "\b\u001b[K"] +[634.081351, "o", "r"] +[634.166271, "o", "e"] +[634.258451, "o", "a"] +[634.315484, "o", "k"] +[637.580479, "o", "\r\n"] +[637.580605, "o", "Delete all breakpoints? (y or n) "] +[638.161286, "o", "y"] +[638.358828, "o", "\r\n"] +[638.359126, "o", "(gdb) "] +[638.816338, "o", "n"] +[639.01657, "o", "\r\n"] +[639.02752, "o", "\u001b[33mentry_INT80_32\u001b[m () at \u001b[32march/x86/entry/entry_32.S\u001b[m:1056\r\n1056\t\tSAVE_ALL pt_regs_ax=$-ENOSYS switch_stacks=1\t/* save rest */\r\n"] +[639.027905, "o", "(gdb) "] +[639.989246, "o", "\r\n"] +[640.089993, "o", "\u001b[33mentry_INT80_32\u001b[m () at \u001b[32march/x86/entry/entry_32.S\u001b[m:1058\r\n1058\t\tmovl\t%esp, %eax\r\n(gdb) "] +[640.575852, "o", "\r\n"] +[640.584316, "o", "1059\t\tcall\tdo_int80_syscall_32\r\n"] +[640.584404, "o", "(gdb) "] +[641.020396, "o", "\r\n"] +[641.043758, "o", "1064\t\tSWITCH_TO_ENTRY_STACK\r\n"] +[641.043903, "o", "(gdb) "] +[642.078973, "o", "\r\n"] +[642.131705, "o", "\u001b[33mentry_INT80_32\u001b[m () at \u001b[32march/x86/entry/entry_32.S\u001b[m:1065\r\n1065\t\tCHECK_AND_APPLY_ESPFIX\r\n(gdb) "] +[643.550621, "o", "\r\n"] +[643.569874, "o", "1068\t\tSWITCH_TO_USER_CR3 scratch_reg=%eax\r\n(gdb) "] +[645.089643, "o", "\r\n"] +[645.097692, "o", "1073\t\tRESTORE_REGS pop=4\t\t\t# skip orig_eax/error_code\r\n"] +[645.09795, "o", "(gdb) "] +[646.112248, "o", "\r\n"] +[646.177685, "o", "\u001b[33mentry_INT80_32\u001b[m () at \u001b[32march/x86/entry/entry_32.S\u001b[m:1080\r\n1080\t\tINTERRUPT_RETURN\r\n"] +[646.177724, "o", "(gdb) "] +[650.665768, "o", "\r\n"] +[650.685116, "o", "\u001b[34m0x448d167d\u001b[m in \u001b[33m??\u001b[m ()\r\n"] +[650.686558, "o", "(gdb) "] +[655.046392, "o", "#"] +[655.210079, "o", " "] +[655.404159, "o", "t"] +[655.481148, "o", "h"] +[655.531965, "o", "i"] +[655.669289, "o", "s"] +[655.756962, "o", " "] +[656.023425, "o", "l"] +[656.230497, "o", "o"] +[656.350096, "o", "o"] +[656.430194, "o", "k"] +[656.543202, "o", "s"] +[656.614577, "o", " "] +[656.747584, "o", "l"] +[656.90921, "o", "i"] +[657.044202, "o", "k"] +[657.15373, "o", "e"] +[657.207156, "o", " "] +[657.321091, "o", "a"] +[657.405793, "o", " "] +[657.735833, "o", "u"] +[657.828575, "o", "s"] +[657.933278, "o", "e"] +[658.002691, "o", "r"] +[658.288285, "o", " "] +[658.450784, "o", "s"] +[658.550787, "o", "p"] +[658.610481, "o", "a"] +[658.76577, "o", "c"] +[658.84823, "o", "e"] +[658.919578, "o", " "] +[658.996235, "o", "a"] +[659.117812, "o", "d"] +[659.253844, "o", "d"] +[659.433193, "o", "r"] +[659.48534, "o", "e"] +[659.60751, "o", "s"] +[659.754908, "o", "s"] +[659.888093, "o", ","] +[659.977163, "o", " "] +[660.161783, "o", "l"] +[660.22061, "o", "e"] +[660.384788, "o", "t"] +[660.593814, "o", "s"] +[660.725078, "o", " "] +[661.244123, "o", "c"] +[661.390876, "o", "h"] +[661.490006, "o", "e"] +[661.556587, "o", "c"] +[661.641873, "o", "k"] +[661.806617, "o", " "] +[662.017518, "o", "t"] +[662.09433, "o", "h"] +[662.20504, "o", "e"] +[662.271583, "o", " "] +[662.406577, "o", "c"] +[662.496568, "o", "o"] +[662.695698, "o", "d"] +[663.60129, "o", "e"] +[663.748373, "o", " "] +[663.955006, "o", "s"] +[664.114279, "o", "e"] +[664.25419, "o", "l"] +[664.303753, "o", "e"] +[664.381874, "o", "c"] +[664.587642, "o", "t"] +[664.674974, "o", "o"] +[664.749721, "o", "r"] +[665.224982, "o", "\r\n"] +[665.225096, "o", "(gdb) "] +[665.695276, "o", "p"] +[665.792372, "o", "r"] +[665.897882, "o", "i"] +[665.94532, "o", "n"] +[666.023454, "o", "t"] +[666.0908, "o", " "] +[666.432583, "o", "x"] +[666.832639, "o", "\b\u001b[K"] +[667.03479, "o", "/"] +[667.144708, "o", "x"] +[667.249841, "o", " "] +[667.530074, "o", "$"] +[667.849312, "o", "c"] +[667.93295, "o", "s"] +[668.402085, "o", "\r\n"] +[668.403271, "o", "$11 = 0x73\r\n"] +[668.404009, "o", "(gdb) "] +[672.252423, "o", "#"] +[673.194564, "o", " "] +[673.675156, "o", "t"] +[673.760362, "o", "h"] +[673.882635, "o", "e"] +[673.994028, "o", " "] +[674.513812, "o", "p"] +[674.706893, "o", "r"] +[674.804439, "o", "i"] +[674.94746, "o", "v"] +[675.010894, "o", "i"] +[675.188625, "o", "l"] +[675.265228, "o", "e"] +[675.511041, "o", "g"] +[675.587286, "o", "e"] +[675.739396, "o", " "] +[675.935171, "o", "l"] +[676.005954, "o", "e"] +[676.202384, "o", "v"] +[676.294477, "o", "e"] +[676.354083, "o", "l"] +[676.485263, "o", " "] +[676.602646, "o", "i"] +[676.704354, "o", "s"] +[676.784185, "o", " "] +[676.981088, "o", "3"] +[677.195168, "o", ","] +[677.326062, "o", " "] +[677.604308, "o", "s"] +[677.666906, "o", "o"] +[677.785998, "o", " "] +[678.506076, "o", "i"] +[678.655711, "o", "n"] +[678.780883, "o", "e"] +[678.940158, "o", "e"] +[679.044873, "o", "d"] +[679.222637, "o", " "] +[679.56275, "o", "\b\u001b[K"] +[679.698968, "o", "\b\u001b[K"] +[679.830282, "o", "\b\u001b[K"] +[679.983681, "o", "\b\u001b[K"] +[680.311095, "o", "d"] +[680.410929, "o", "e"] +[680.580716, "o", "e"] +[680.692035, "o", "d"] +[680.797568, "o", " "] +[680.979638, "o", "w"] +[681.042251, "o", "e"] +[681.132403, "o", " "] +[681.247785, "o", "a"] +[681.393117, "o", "r"] +[681.475054, "o", "e"] +[681.54512, "o", " "] +[682.162527, "o", "i"] +[682.280131, "o", "n"] +[682.527043, "o", "u"] +[682.867049, "o", "\b\u001b[K"] +[682.971752, "o", " "] +[683.475284, "o", "\b\u001b[K"] +[683.616692, "o", "\b\u001b[K"] +[683.74114, "o", "\b\u001b[K"] +[683.998681, "o", "r"] +[684.057732, "o", "u"] +[684.122526, "o", "n"] +[684.271564, "o", "i"] +[684.326317, "o", "n"] +[684.689606, "o", "\b\u001b[K"] +[684.828735, "o", "\b\u001b[K"] +[685.172894, "o", "n"] +[685.301489, "o", "i"] +[685.385224, "o", "n"] +[685.524585, "o", "g"] +[685.652293, "o", " "] +[685.800678, "o", "i"] +[685.887831, "o", "n"] +[685.944332, "o", " "] +[686.069441, "o", "u"] +[686.162036, "o", "s"] +[686.243565, "o", "e"] +[686.317807, "o", "r"] +[686.376218, "o", " "] +[686.540215, "o", "m"] +[686.598979, "o", "o"] +[686.625005, "o", "d"] +[686.723705, "o", "e"] +[686.778902, "o", "\r\n"] +[686.779014, "o", "(gdb) "] +[692.53241, "o", "#"] +[692.721453, "o", " "] +[692.892056, "o", "l"] +[693.024663, "o", "e"] +[693.193954, "o", "t"] +[693.371269, "o", "s"] +[693.597078, "o", "g"] +[693.885546, "o", "\b\u001b[K"] +[693.978727, "o", " "] +[694.049596, "o", "g"] +[694.108902, "o", "e"] +[694.247468, "o", "t"] +[694.345634, "o", " "] +[694.511566, "o", "t"] +[694.587774, "o", "h"] +[694.6909, "o", "e"] +[694.771112, "o", " "] +[694.863811, "o", "s"] +[694.977826, "o", "e"] +[695.074086, "o", "l"] +[695.155131, "o", "e"] +[695.229863, "o", "c"] +[695.442999, "o", "t"] +[695.821135, "o", "o"] +[695.950336, "o", "r"] +[696.053426, "o", "\r\n"] +[696.053731, "o", "(gdb) "] +[696.74081, "o", "p"] +[696.882904, "o", "r"] +[696.95492, "o", "i"] +[697.03238, "o", "n"] +[697.104735, "o", "t"] +[697.202097, "o", " "] +[697.420614, "o", "."] +[697.660736, "o", "x"] +[698.088723, "o", "\b\u001b[K"] +[698.214544, "o", "\b\u001b[K"] +[698.416446, "o", "/"] +[698.57942, "o", "x"] +[698.684619, "o", " "] +[699.479828, "o", "$"] +[699.980736, "o", "c"] +[700.065109, "o", "s"] +[700.396996, "o", ">"] +[700.526326, "o", ">"] +[700.713679, "o", "3"] +[700.79379, "o", "2"] +[701.128706, "o", "\b\u001b[K"] +[701.345675, "o", "\b\u001b[K"] +[701.604745, "o", "3"] +[702.59867, "o", "\r\n"] +[702.599922, "o", "$12 = 0xe\r\n"] +[702.600842, "o", "(gdb) "] +[705.06673, "o", "\u0007"] +[705.324321, "o", "print /x $cs>>3"] +[705.503241, "o", "\b"] +[706.003691, "o", "\b"] +[706.034303, "o", "\b"] +[706.064471, "o", "\b"] +[706.095291, "o", "\b"] +[706.125113, "o", "\b"] +[706.155725, "o", "\b"] +[706.523474, "o", "\u001b[C"] +[706.779876, "o", "\b\u001b[1P$cs>>3\b\b\b\b\b\b"] +[706.91489, "o", "\b\u001b[1P$cs>>3\b\b\b\b\b\b"] +[707.042101, "o", "\b\u001b[1P$cs>>3\b\b\b\b\b\b"] +[707.262476, "o", "\r\n"] +[707.263867, "o", "$13 = 14\r\n"] +[707.264707, "o", "(gdb) "] +[718.972351, "o", "s"] +[719.055223, "o", "e"] +[719.204699, "o", "t"] +[719.38757, "o", " "] +[719.750857, "o", "$"] +[721.845151, "o", "u"] +[721.971635, "o", "s"] +[722.053169, "o", "e"] +[722.146473, "o", "r"] +[722.392609, "o", "_"] +[729.373498, "o", "c"] +[729.521886, "o", "o"] +[729.630095, "o", "d"] +[729.712985, "o", "e"] +[730.34708, "o", "="] +[742.363689, "o", "((uint64_t*)$gdtr)["] +[743.594954, "o", "1"] +[744.208712, "o", "4"] +[745.162637, "o", "]"] +[746.699323, "o", "\r\n"] +[746.707477, "o", "(gdb) "] +[747.302209, "o", "p"] +[747.409252, "o", "r"] +[747.545136, "o", "i"] +[747.603175, "o", "n"] +[747.629656, "o", "t"] +[747.765806, "o", " "] +[747.957777, "o", "/"] +[748.073233, "o", "z"] +[748.193756, "o", " "] +[748.536198, "o", "$"] +[748.849674, "o", "u"] +[748.981324, "o", "s"] +[749.09022, "o", "e"] +[749.141073, "o", "r"] +[749.742629, "o", "_"] +[750.000935, "o", "c"] +[750.098552, "o", "o"] +[750.20873, "o", "d"] +[750.312852, "o", "e"] +[750.456634, "o", "\r\n"] +[750.457831, "o", "$14 = 0x00cffa000000ffff\r\n"] +[750.458525, "o", "(gdb) "] +[760.922271, "o", "#"] +[761.152125, "o", " "] +[761.321912, "o", "l"] +[761.483755, "o", "e"] +[761.677907, "o", "t"] +[761.977657, "o", "s"] +[762.451478, "o", " "] +[763.858006, "o", "p"] +[764.048606, "o", "r"] +[764.12853, "o", "i"] +[764.205234, "o", "n"] +[764.281491, "o", "t"] +[764.423725, "o", " "] +[764.529463, "o", "t"] +[764.642091, "o", "h"] +[764.76313, "o", "e"] +[764.906969, "o", " "] +[766.022913, "o", "b"] +[766.141544, "o", "a"] +[766.245647, "o", "s"] +[766.449316, "o", "e"] +[768.319525, "o", "\r\n"] +[768.319645, "o", "(gdb) "] +[809.07354, "o", "print /x (($user_code>>32)&0xFF000000)|(($kernel_code>>16)&0x00FFFFFF)\r\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C"] +[809.256141, "o", "\u001b[C"] +[809.75659, "o", "\u001b[C"] +[809.786818, "o", "\u001b[C"] +[809.817626, "o", "\u001b[C"] +[809.848195, "o", "\u001b[C"] +[809.878997, "o", "\u001b[C"] +[809.909101, "o", "\u001b[C"] +[809.940364, "o", "\u001b[C"] +[809.970702, "o", "\u001b[C"] +[810.001365, "o", "\u001b[C"] +[810.031559, "o", "\u001b[C"] +[810.062018, "o", "\u001b[C"] +[810.093057, "o", "\u001b[C"] +[810.123449, "o", "\u001b[C"] +[810.153727, "o", "\u001b[C"] +[810.184235, "o", "\u001b[C"] +[810.214952, "o", "\u001b[C"] +[810.246219, "o", "\u001b[C"] +[810.276599, "o", "\u001b[C"] +[810.306915, "o", "\u001b[C"] +[810.336951, "o", "\u001b[C"] +[810.367283, "o", "\u001b[C"] +[810.397779, "o", "\u001b[C"] +[810.428429, "o", "\u001b[C"] +[810.458566, "o", "\u001b[C"] +[810.488834, "o", "\u001b[C"] +[810.519269, "o", "\u001b[C"] +[810.549961, "o", "\u001b[C"] +[810.580356, "o", "\u001b[C"] +[810.611188, "o", "\u001b[C"] +[810.641482, "o", "\u001b[C"] +[810.671188, "o", "\u001b[C"] +[810.702462, "o", "\u001b[C"] +[810.975545, "o", "\b"] +[811.264615, "o", "\b\u001b[1P_code>>16)&0x00FFFFFF)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b"] +[811.40386, "o", "\b\u001b[1P_code>>16)&0x00FFFFFF)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b"] +[811.523682, "o", "\b\u001b[1P_code>>16)&0x00FFFFFF)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b"] +[811.647806, "o", "\b\u001b[1P_code>>16)&0x00FFFFFF)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b"] +[811.784493, "o", "\b\u001b[1P_code>>16)&0x00FFFFFF)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b"] +[811.945961, "o", "\b\u001b[1P_code>>16)&0x00FFFFFF)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b"] +[812.551148, "o", "u_code>>16)&0x00FFFFFF)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b"] +[812.666049, "o", "s_code>>16)&0x00FFFFFF)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b"] +[812.733865, "o", "e_code>>16)&0x00FFFFFF)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b"] +[812.804219, "o", "r_code>>16)&0x00FFFFFF)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b"] +[813.471435, "o", "\r\n"] +[813.472787, "o", "$15 = 0x0\r\n"] +[813.473756, "o", "(gdb) "] +[815.049124, "o", "#"] +[815.227758, "o", " "] +[816.113222, "o", "a"] +[816.244562, "o", "n"] +[816.316434, "o", "d"] +[816.425143, "o", " "] +[816.637818, "o", "t"] +[816.711367, "o", "h"] +[816.826612, "o", "e"] +[816.890874, "o", " "] +[817.064052, "o", "l"] +[817.240246, "o", "i"] +[817.356865, "o", "m"] +[817.432586, "o", "i"] +[817.56418, "o", "t"] +[818.009451, "o", "\r\n"] +[818.009561, "o", "(gdb) "] +[818.432237, "o", "p"] +[818.564952, "o", "r"] +[818.698167, "o", "i"] +[818.724308, "o", "n"] +[818.826911, "o", "t"] +[818.919965, "o", " "] +[819.207993, "o", "/"] +[819.381762, "o", "x"] +[819.508217, "o", " "] +[820.316542, "o", "$"] +[820.975916, "o", "u"] +[821.115032, "o", "s"] +[821.207129, "o", "e"] +[821.270953, "o", "r"] +[821.414288, "o", "_"] +[821.602876, "o", "c"] +[821.687005, "o", "o"] +[821.816962, "o", "d"] +[821.884079, "o", "e"] +[823.356186, "o", "&"] +[824.379479, "o", "0"] +[824.519435, "o", "x"] +[825.853806, "o", "f"] +[826.065333, "o", "f"] +[826.251408, "o", "f"] +[826.528401, "o", "f"] +[827.075458, "o", ")"] +[827.392703, "o", "\b"] +[827.893571, "o", "\b"] +[827.923588, "o", "\b"] +[827.953933, "o", "\b"] +[827.984307, "o", "\b"] +[828.014987, "o", "\b"] +[828.046374, "o", "\b"] +[828.076921, "o", "\b"] +[828.10759, "o", "\b"] +[828.137907, "o", "\b"] +[828.1682, "o", "\b"] +[828.198418, "o", "\b"] +[828.229355, "o", "\b"] +[828.259187, "o", "\b"] +[828.289623, "o", "\b"] +[828.320693, "o", "\b"] +[828.35114, "o", "\b"] +[828.382249, "o", "\b"] +[829.085558, "o", "($user_code&0xffff)\r\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C"] +[829.708728, "o", "\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C"] +[830.400685, "o", "*"] +[830.93294, "o", "4"] +[831.02888, "o", "0"] +[831.20694, "o", "9"] +[831.583008, "o", "5"] +[832.196999, "o", "\b\u001b[K"] +[832.544697, "o", "6"] +[833.211351, "o", "\r\n"] +[833.21263, "o", "$16 = 0xffff000\r\n"] +[833.213594, "o", "(gdb) "] +[836.747221, "o", "#"] +[836.966915, "o", " "] +[837.122971, "o", "l"] +[837.264712, "o", "i"] +[837.415757, "o", "k"] +[837.487956, "o", "e"] +[837.582304, "o", " "] +[837.796811, "o", "b"] +[837.86537, "o", "e"] +[838.009048, "o", "f"] +[838.13558, "o", "o"] +[838.276445, "o", "r"] +[838.322393, "o", "e"] +[838.696696, "o", ","] +[838.845816, "o", " "] +[839.780549, "o", "b"] +[839.849384, "o", "a"] +[839.927552, "o", "s"] +[840.041133, "o", "e"] +[840.157198, "o", " "] +[840.376833, "o", "="] +[840.582513, "o", " "] +[840.751071, "o", "0"] +[840.957989, "o", " "] +[841.359805, "o", "\b\u001b[K"] +[841.654447, "o", " "] +[842.09277, "o", "\b\u001b[K"] +[842.473777, "o", ","] +[842.5702, "o", " "] +[842.730569, "o", "l"] +[842.881001, "o", "i"] +[843.046392, "o", "m"] +[843.149457, "o", "i"] +[843.28806, "o", "t"] +[843.603712, "o", "="] +[844.085459, "o", "4"] +[844.905693, "o", "G"] +[845.388067, "o", "\r\n(gdb) "] +[846.021339, "o", "#"] +[846.1873, "o", " "] +[846.362737, "o", "f"] +[846.424606, "o", "i"] +[846.539695, "o", "n"] +[847.063757, "o", "a"] +[847.427324, "o", "l"] +[847.539554, "o", "l"] +[847.791174, "o", "y"] +[847.863453, "o", " "] +[848.073511, "o", "l"] +[848.146045, "o", "e"] +[848.331511, "o", "t"] +[848.502747, "o", "s"] +[848.612159, "o", " "] +[848.811446, "o", "p"] +[849.033555, "o", "i"] +[849.47643, "o", "\b\u001b[K"] +[849.592317, "o", "r"] +[849.689119, "o", "i"] +[849.750955, "o", "n"] +[849.865499, "o", " "] +[849.912217, "o", "t"] +[850.068171, "o", "h"] +[850.185347, "o", "t"] +[850.663395, "o", "e"] +[850.892227, "o", "\b\u001b[K"] +[851.01506, "o", "\b\u001b[K"] +[851.090625, "o", "e"] +[851.184358, "o", " "] +[852.40202, "o", "p"] +[852.541371, "o", "r"] +[852.627033, "o", "i"] +[852.852467, "o", "v"] +[853.112142, "o", "\b\u001b[K"] +[853.234241, "o", "\b\u001b[K"] +[853.353082, "o", "\b\u001b[K"] +[853.454046, "o", "r"] +[853.673116, "o", "\b\u001b[K"] +[853.791573, "o", "\b\u001b[K"] +[853.958148, "o", "r"] +[854.049767, "o", "e"] +[854.151012, "o", "q"] +[854.43396, "o", "u"] +[854.480219, "o", "i"] +[854.525939, "o", "r"] +[854.602678, "o", "e"] +[854.764555, "o", "d"] +[854.917843, "o", " "] +[855.307316, "o", "p"] +[855.579807, "o", "r"] +[855.701494, "o", "i"] +[855.941647, "o", "v"] +[856.022055, "o", "i"] +[856.206419, "o", "l"] +[856.294774, "o", "e"] +[856.534217, "o", "g"] +[856.613545, "o", "e"] +[856.970087, "o", "\r\n"] +[856.970197, "o", "(gdb) "] +[858.880707, "o", "p"] +[859.008756, "o", "r"] +[859.084916, "o", "i"] +[859.148296, "o", "n"] +[859.194996, "o", "t"] +[859.279003, "o", " "] +[859.509048, "o", "/"] +[859.653839, "o", "x"] +[859.789865, "o", " "] +[860.019894, "o", "$"] +[861.401365, "o", "u"] +[861.442956, "o", "s"] +[861.564205, "o", "e"] +[861.621157, "o", "r"] +[861.743186, "o", "_"] +[861.895258, "o", "c"] +[862.01297, "o", "o"] +[862.137427, "o", "d"] +[862.21639, "o", "e"] +[862.540725, "o", ">"] +[862.850378, "o", ">"] +[864.616721, "o", "4"] +[864.804028, "o", "5"] +[865.120143, "o", ")"] +[865.47208, "o", "\b"] +[865.9722, "o", "\b"] +[866.002112, "o", "\b"] +[866.032795, "o", "\b"] +[866.063313, "o", "\b"] +[866.093033, "o", "\b"] +[866.123273, "o", "\b"] +[866.153238, "o", "\b"] +[866.183841, "o", "\b"] +[866.214712, "o", "\b"] +[866.245027, "o", "\b"] +[866.275735, "o", "\b"] +[866.306467, "o", "\b"] +[866.569002, "o", "\b"] +[866.786537, "o", "\b"] +[867.202622, "o", "($user_code>>45)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b"] +[867.699393, "o", "\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C"] +[868.510852, "o", "&"] +[869.114958, "o", "3"] +[869.282934, "o", "\r\n"] +[869.284128, "o", "$17 = 0x3\r\n"] +[869.28494, "o", "(gdb) "] +[870.908449, "o", "#"] +[871.210682, "o", " "] +[872.957141, "o", "a"] +[873.042207, "o", "s"] +[873.160801, "o", " "] +[873.311215, "o", "e"] +[873.710163, "o", "x"] +[873.882767, "o", "p"] +[873.963709, "o", "e"] +[874.171292, "o", "c"] +[874.382806, "o", "t"] +[874.467515, "o", "e"] +[874.657769, "o", "d"] +[874.790228, "o", ","] +[874.907512, "o", " "] +[875.097689, "o", "p"] +[875.201934, "o", "r"] +[875.282314, "o", "i"] +[875.630355, "o", "v"] +[875.863496, "o", "i"] +[876.094037, "o", "l"] +[876.277817, "o", "e"] +[876.501288, "o", "g"] +[876.578121, "o", "e"] +[876.773456, "o", " "] +[876.998859, "o", "3"] +[877.119419, "o", " "] +[877.673306, "o", "="] +[877.818837, "o", " "] +[878.24644, "o", "u"] +[878.372011, "o", "s"] +[878.430652, "o", "e"] +[878.495114, "o", "r"] +[878.622827, "o", " "] +[879.099481, "o", "o"] +[879.154198, "o", "d"] +[879.223305, "o", "e"] +[879.476677, "o", "\b\u001b[K"] +[879.606628, "o", "\b\u001b[K"] +[879.778496, "o", "\b\u001b[K"] +[880.000743, "o", "m"] +[880.050628, "o", "o"] +[880.127009, "o", "d"] +[880.191324, "o", "e"] +[880.418045, "o", "\r\n"] +[880.418349, "o", "(gdb) "] diff --git a/Documentation/teaching/res/sifive_uart.c b/Documentation/teaching/res/sifive_uart.c new file mode 100644 index 00000000000000..fe1266678932de --- /dev/null +++ b/Documentation/teaching/res/sifive_uart.c @@ -0,0 +1,192 @@ +/* + * QEMU model of the UART on the SiFive E300 and U500 series SOCs. + * + * Copyright (c) 2016 Stefan O'Rear + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "qemu/log.h" +#include "chardev/char.h" +#include "chardev/char-fe.h" +#include "hw/irq.h" +#include "hw/char/sifive_uart.h" + +/* + * Not yet implemented: + * + * Transmit FIFO using "qemu/fifo8.h" + */ + +/* Returns the state of the IP (interrupt pending) register */ +static uint64_t uart_ip(SiFiveUARTState *s) +{ + uint64_t ret = 0; + + uint64_t txcnt = SIFIVE_UART_GET_TXCNT(s->txctrl); + uint64_t rxcnt = SIFIVE_UART_GET_RXCNT(s->rxctrl); + + if (txcnt != 0) { + ret |= SIFIVE_UART_IP_TXWM; + } + if (s->rx_fifo_len > rxcnt) { + ret |= SIFIVE_UART_IP_RXWM; + } + + return ret; +} + +static void update_irq(SiFiveUARTState *s) +{ + int cond = 0; + if ((s->ie & SIFIVE_UART_IE_TXWM) || + ((s->ie & SIFIVE_UART_IE_RXWM) && s->rx_fifo_len)) { + cond = 1; + } + if (cond) { + qemu_irq_raise(s->irq); + } else { + qemu_irq_lower(s->irq); + } +} + +static uint64_t +uart_read(void *opaque, hwaddr addr, unsigned int size) +{ + SiFiveUARTState *s = opaque; + unsigned char r; + switch (addr) { + case SIFIVE_UART_RXFIFO: + if (s->rx_fifo_len) { + r = s->rx_fifo[0]; + memmove(s->rx_fifo, s->rx_fifo + 1, s->rx_fifo_len - 1); + s->rx_fifo_len--; + qemu_chr_fe_accept_input(&s->chr); + update_irq(s); + return r; + } + return 0x80000000; + + case SIFIVE_UART_TXFIFO: + return 0; /* Should check tx fifo */ + case SIFIVE_UART_IE: + return s->ie; + case SIFIVE_UART_IP: + return uart_ip(s); + case SIFIVE_UART_TXCTRL: + return s->txctrl; + case SIFIVE_UART_RXCTRL: + return s->rxctrl; + case SIFIVE_UART_DIV: + return s->div; + } + + qemu_log_mask(LOG_GUEST_ERROR, "%s: bad read: addr=0x%x\n", + __func__, (int)addr); + return 0; +} + +static void +uart_write(void *opaque, hwaddr addr, + uint64_t val64, unsigned int size) +{ + SiFiveUARTState *s = opaque; + uint32_t value = val64; + unsigned char ch = value; + + switch (addr) { + case SIFIVE_UART_TXFIFO: + qemu_chr_fe_write(&s->chr, &ch, 1); + update_irq(s); + return; + case SIFIVE_UART_IE: + s->ie = val64; + update_irq(s); + return; + case SIFIVE_UART_TXCTRL: + s->txctrl = val64; + return; + case SIFIVE_UART_RXCTRL: + s->rxctrl = val64; + return; + case SIFIVE_UART_DIV: + s->div = val64; + return; + } + qemu_log_mask(LOG_GUEST_ERROR, "%s: bad write: addr=0x%x v=0x%x\n", + __func__, (int)addr, (int)value); +} + +static const MemoryRegionOps uart_ops = { + .read = uart_read, + .write = uart_write, + .endianness = DEVICE_NATIVE_ENDIAN, + .valid = { + .min_access_size = 4, + .max_access_size = 4 + } +}; + +static void uart_rx(void *opaque, const uint8_t *buf, int size) +{ + SiFiveUARTState *s = opaque; + + /* Got a byte. */ + if (s->rx_fifo_len >= sizeof(s->rx_fifo)) { + printf("WARNING: UART dropped char.\n"); + return; + } + s->rx_fifo[s->rx_fifo_len++] = *buf; + + update_irq(s); +} + +static int uart_can_rx(void *opaque) +{ + SiFiveUARTState *s = opaque; + + return s->rx_fifo_len < sizeof(s->rx_fifo); +} + +static void uart_event(void *opaque, QEMUChrEvent event) +{ +} + +static int uart_be_change(void *opaque) +{ + SiFiveUARTState *s = opaque; + + qemu_chr_fe_set_handlers(&s->chr, uart_can_rx, uart_rx, uart_event, + uart_be_change, s, NULL, true); + + return 0; +} + +/* + * Create UART device. + */ +SiFiveUARTState *sifive_uart_create(MemoryRegion *address_space, hwaddr base, + Chardev *chr, qemu_irq irq) +{ + SiFiveUARTState *s = g_malloc0(sizeof(SiFiveUARTState)); + s->irq = irq; + qemu_chr_fe_init(&s->chr, chr, &error_abort); + qemu_chr_fe_set_handlers(&s->chr, uart_can_rx, uart_rx, uart_event, + uart_be_change, s, NULL, true); + memory_region_init_io(&s->mmio, NULL, &uart_ops, s, + TYPE_SIFIVE_UART, SIFIVE_UART_MAX); + memory_region_add_subregion(address_space, base, &s->mmio); + return s; +} diff --git a/Documentation/teaching/res/skb.png b/Documentation/teaching/res/skb.png new file mode 100755 index 00000000000000..db956dc143670b Binary files /dev/null and b/Documentation/teaching/res/skb.png differ diff --git a/Documentation/teaching/res/slab-coloring.png b/Documentation/teaching/res/slab-coloring.png new file mode 100644 index 00000000000000..1391ce55d79816 Binary files /dev/null and b/Documentation/teaching/res/slab-coloring.png differ diff --git a/Documentation/teaching/res/slab-detailed-arch.png b/Documentation/teaching/res/slab-detailed-arch.png new file mode 100644 index 00000000000000..77a0cc811b79ad Binary files /dev/null and b/Documentation/teaching/res/slab-detailed-arch.png differ diff --git a/Documentation/teaching/res/slab-object-descriptors.png b/Documentation/teaching/res/slab-object-descriptors.png new file mode 100644 index 00000000000000..dbeab55b8d104b Binary files /dev/null and b/Documentation/teaching/res/slab-object-descriptors.png differ diff --git a/Documentation/teaching/res/slab-overview.png b/Documentation/teaching/res/slab-overview.png new file mode 100644 index 00000000000000..90086d4b0f9139 Binary files /dev/null and b/Documentation/teaching/res/slab-overview.png differ diff --git a/Documentation/teaching/res/syscalls-inspection.cast b/Documentation/teaching/res/syscalls-inspection.cast new file mode 100644 index 00000000000000..ca749a423021e3 --- /dev/null +++ b/Documentation/teaching/res/syscalls-inspection.cast @@ -0,0 +1,1389 @@ +{"title": "System Call Inspection", "height": 24, "idle_time_limit": 1.0, "version": 2, "env": {"SHELL": "/bin/bash", "TERM": "xterm-256color"}, "width": 80, "timestamp": 1519682642} +[0.02593, "o", "\u001b]0;tavi@lktp: ~/src/linux/tools/labs\u0007\u001b[01;32mtavi@lktp\u001b[00m:\u001b[01;34m~/src/linux/tools/labs\u001b[00m$ "] +[2.585046, "o", "#"] +[2.812131, "o", " "] +[2.94729, "o", "a"] +[3.187178, "o", "t"] +[3.308689, "o", "t"] +[3.380836, "o", "a"] +[3.587609, "o", "c"] +[3.660319, "o", "h"] +[3.74021, "o", " "] +[3.935004, "o", "g"] +[4.157892, "o", "d"] +[4.34303, "o", "b"] +[4.527084, "o", " "] +[4.711204, "o", "t"] +[4.768411, "o", "o"] +[4.85479, "o", " "] +[5.081524, "o", "V"] +[5.193867, "o", "M"] +[5.366551, "o", "\r\n"] +[5.367316, "o", "\u001b]0;tavi@lktp: ~/src/linux/tools/labs\u0007\u001b[01;32mtavi@lktp\u001b[00m:\u001b[01;34m~/src/linux/tools/labs\u001b[00m$ "] +[6.562559, "o", "m"] +[6.655394, "o", "a"] +[6.702303, "o", "k"] +[6.826912, "o", "e"] +[6.898232, "o", " "] +[7.081019, "o", "g"] +[7.184305, "o", "d"] +[7.250501, "o", "b"] +[7.464891, "o", "\r\n"] +[7.487695, "o", "gdb -ex \"target remote localhost:1234\" /home/tavi/src/linux/vmlinux\r\n"] +[7.552276, "o", "GNU gdb (Ubuntu 7.11.1-0ubuntu1~16.5) 7.11.1\r\nCopyright (C) 2016 Free Software Foundation, Inc.\r\nLicense GPLv3+: GNU GPL version 3 or later \r\nThis is free software: you are free to change and redistribute it.\r\nThere is NO WARRANTY, to the extent permitted by law. Type \"show copying\"\r\nand \"show warranty\" for details.\r\nThis GDB was configured as \"x86_64-linux-gnu\".\r\nType \"show configuration\" for configuration details.\r\nFor bug reporting instructions, please see:\r\n.\r\nFind the GDB manual and other documentation resources online at:\r\n.\r\nFor help, type \"help\".\r\nType \"apropos word\" to search for commands related to \"word\"...\r\n"] +[7.552711, "o", "Reading symbols from /home/tavi/src/linux/vmlinux..."] +[8.0237, "o", "done.\r\n"] +[8.040804, "o", "Remote debugging using localhost:1234\r\n"] +[8.049686, "o", "default_idle () at arch/x86/kernel/process.c:357\r\n"] +[8.049841, "o", "357\t}\r\n"] +[8.049944, "o", "(gdb) "] +[8.617598, "o", "b"] +[8.712276, "o", "t"] +[8.906112, "o", "\r\n"] +[8.907612, "o", "#0 default_idle () at arch/x86/kernel/process.c:357\r\n#1 0xc101fcfd in arch_cpu_idle () at arch/x86/kernel/process.c:346\r\n"] +[8.916461, "o", "#2 0xc14639f9 in default_idle_call () at kernel/sched/idle.c:98\r\n"] +[8.916818, "o", "#3 0xc107b2a5 in cpuidle_idle_call () at kernel/sched/idle.c:156\r\n#4 do_idle () at kernel/sched/idle.c:246\r\n"] +[8.923466, "o", "#5 0xc107b5b5 in cpu_startup_entry (state=)\r\n at kernel/sched/idle.c:351\r\n#6 0xc145d643 in rest_init () at init/main.c:436\r\n"] +[8.923757, "o", "#7 0xc1614acb in start_kernel () at init/main.c:716\r\n"] +[8.926458, "o", "#8 0xc161424a in i386_start_kernel () at arch/x86/kernel/head32.c:56\r\n"] +[8.926871, "o", "#9 0xc10001d3 in startup_32_smp () at arch/x86/kernel/head_32.S:363\r\n"] +[8.927813, "o", "#10 0x00000000 in ?? ()\r\n"] +[8.928515, "o", "(gdb) "] +[9.441926, "o", "#"] +[9.610515, "o", " "] +[10.164044, "o", "V"] +[10.268664, "o", "M"] +[10.453102, "o", " "] +[10.592012, "o", "i"] +[10.687218, "o", "s"] +[10.769038, "o", " "] +[10.88914, "o", "i"] +[11.031982, "o", "d"] +[11.171022, "o", "l"] +[11.339681, "o", "e"] +[11.541285, "o", "\r\n"] +[11.541408, "o", "(gdb) "] +[13.459643, "o", "#"] +[13.604705, "o", " "] +[13.787454, "o", "l"] +[13.867483, "o", "e"] +[14.028803, "o", "t"] +[14.196721, "o", "s"] +[14.299574, "o", " "] +[14.400587, "o", "a"] +[14.678709, "o", "t"] +[15.297907, "o", "\b\u001b[K"] +[15.474776, "o", "\b\u001b[K"] +[17.314512, "o", "a"] +[17.493688, "o", "d"] +[17.640039, "o", "d"] +[17.734576, "o", " "] +[17.836868, "o", "a"] +[17.94166, "o", " "] +[18.172955, "o", "b"] +[18.231003, "o", "r"] +[18.338597, "o", "e"] +[18.389443, "o", "a"] +[18.493356, "o", "k"] +[18.697004, "o", "p"] +[18.791969, "o", "o"] +[19.002137, "o", "i"] +[19.076693, "o", "n"] +[19.247079, "o", "t"] +[19.682963, "o", " "] +[19.920143, "o", "t"] +[20.019301, "o", "o"] +[20.13914, "o", " "] +[20.298365, "o", "a"] +[20.399756, "o", " "] +[20.543224, "o", "s"] +[20.654014, "o", "y"] +[20.722395, "o", "s"] +[20.875473, "o", "t"] +[20.949965, "o", "e"] +[21.03673, "o", "m"] +[21.105939, "o", " "] +[21.199805, "o", "c"] +[21.248673, "o", "a"] +[21.33164, "o", "l"] +[21.457598, "o", "l"] +[21.662139, "o", "\r\n"] +[21.662437, "o", "(gdb) "] +[23.558939, "o", "b"] +[23.610705, "o", "r"] +[23.671647, "o", "e"] +[23.830052, "o", "\u0007ak"] +[24.613391, "o", " "] +[25.766837, "o", "s"] +[25.878063, "o", "y"] +[25.959348, "o", "s"] +[26.26539, "o", "_"] +[26.884977, "o", "d"] +[26.936127, "o", "u"] +[27.021843, "o", "p"] +[27.318277, "o", "2"] +[27.598228, "o", "\r\n"] +[27.640182, "o", "Breakpoint 1 at 0xc1139210: file fs/file.c, line 912.\r\n"] +[27.64023, "o", "(gdb) "] +[28.770631, "o", "c"] +[29.000408, "o", "\r\nContinuing.\r\n"] +[29.585196, "o", "^Z"] +[29.585536, "o", "\r\n[1]+ Stopped make gdb\r\n"] +[29.586221, "o", "\u001b]0;tavi@lktp: ~/src/linux/tools/labs\u0007\u001b[01;32mtavi@lktp\u001b[00m:\u001b[01;34m~/src/linux/tools/labs\u001b[00m$ "] +[30.715625, "o", "#"] +[30.828185, "o", " "] +[30.978622, "o", "c"] +[31.038514, "o", "o"] +[31.18384, "o", "n"] +[31.28793, "o", "n"] +[31.34898, "o", "e"] +[31.392867, "o", "c"] +[31.690793, "o", " "] +[32.237691, "o", "\b\u001b[K"] +[32.355048, "o", "t"] +[32.442939, "o", " "] +[32.588287, "o", "t"] +[32.918417, "o", "o"] +[33.039158, "o", " "] +[33.167914, "o", "t"] +[33.2546, "o", "h"] +[33.340674, "o", "e"] +[33.395216, "o", " "] +[33.6407, "o", "V"] +[33.72697, "o", "M"] +[33.917668, "o", "\r\n"] +[33.918502, "o", "\u001b]0;tavi@lktp: ~/src/linux/tools/labs\u0007\u001b[01;32mtavi@lktp\u001b[00m:\u001b[01;34m~/src/linux/tools/labs\u001b[00m$ "] +[37.546829, "o", "m"] +[37.637743, "o", "i"] +[37.761726, "o", "n"] +[37.837263, "o", "i"] +[37.938906, "o", "c"] +[38.022622, "o", "o"] +[38.113482, "o", "m"] +[38.172694, "o", " "] +[38.308186, "o", "-"] +[38.54722, "o", "D"] +[38.6566, "o", " "] +[39.13904, "o", "s"] +[39.277557, "o", "e"] +[39.337429, "o", "r"] +[39.459585, "o", "ial.pts "] +[39.776685, "o", "\r\n"] +[39.780118, "o", "\u001b[!p\u001b[?3;4l\u001b[4l\u001b>\u001b[0m\u001b(B"] +[39.780371, "o", "\u001b[?1h\u001b=\u001b[H\u001b[2J"] +[39.781975, "o", "\u001b[?12l\u001b[?25h"] +[39.782204, "o", "\nWelcome to minicom 2.7\r\n\nOPTIONS: I18n \r\n"] +[39.782381, "o", "Compiled on Feb 7 2016, 13:37:27.\r\nPort serial.pts, 23:03:56\r\n\nPress CTRL-A Z for help on special keys\r\n\n"] +[40.619769, "o", "\n"] +[40.622796, "o", "root@qemux86:~# "] +[41.981219, "o", "#"] +[42.161306, "o", " "] +[42.413334, "o", "t"] +[42.58837, "o", "r"] +[42.674525, "o", "i"] +[43.334665, "o", "g"] +[43.464242, "o", "g"] +[43.537786, "o", "e"] +[43.650717, "o", "r"] +[43.838322, "o", " "] +[44.718033, "o", "d"] +[44.842141, "o", "u"] +[44.914998, "o", "p"] +[45.233999, "o", "2"] +[45.931475, "o", " "] +[46.078743, "o", "s"] +[46.175471, "o", "y"] +[46.248864, "o", "s"] +[46.892927, "o", "t"] +[46.987556, "o", "e"] +[47.185408, "o", "m"] +[47.28593, "o", " "] +[47.444084, "o", "c"] +[47.49419, "o", "a"] +[47.548442, "o", "l"] +[47.661648, "o", "l"] +[47.793085, "o", "\r\n"] +[47.79407, "o", "root@qemux86:~# "] +[48.389908, "o", "e"] +[48.469687, "o", "c"] +[48.566341, "o", "h"] +[48.637507, "o", "o"] +[48.761749, "o", " "] +[49.620761, "o", "a"] +[49.796805, "o", " "] +[50.159016, "o", ">"] +[50.287746, "o", " "] +[50.407382, "o", "/"] +[50.591362, "o", "t"] +[50.702265, "o", "m"] +[50.775619, "o", "p"] +[51.05656, "o", "/"] +[51.245617, "o", "x"] +[51.460523, "o", "\r\n"] +[52.650063, "o", "\u001b[0m\u001b(B\u001b[7m\u001b[24;1H\u001b[K\u001b[?12l\u001b[?25h"] +[52.650349, "o", "\u001b[?25lCTRL-A Z for help | 115200 8N1 | NOR | Minicom 2.7 | VT102 | Offline | rial.pts\u001b[?12l\u001b[?25h\u001b[13;1H"] +[52.886767, "o", "\u001b[24;1H\u001b[0m\u001b(B\u001b[?12l\u001b[?25h\u001b[H\u001b[2J\u001b[?1l\u001b>"] +[52.88713, "o", "Suspended. Type \"fg\" to resume.\r\n\r\n[2]+ Stopped minicom -D serial.pts\r\n"] +[52.887817, "o", "\u001b]0;tavi@lktp: ~/src/linux/tools/labs\u0007\u001b[01;32mtavi@lktp\u001b[00m:\u001b[01;34m~/src/linux/tools/labs\u001b[00m$ "] +[53.689978, "o", "f"] +[53.888551, "o", "g"] +[53.990684, "o", " "] +[54.462791, "o", "1"] +[54.54925, "o", "\r\n"] +[54.549574, "o", "make gdb\r\n"] +[54.550655, "o", "\r\n"] +[54.558163, "o", "Breakpoint 1, SyS_dup2 (oldfd=3, newfd=1) at fs/file.c:912\r\n"] +[54.558198, "o", "912\tSYSCALL_DEFINE2(dup2, unsigned int, oldfd, unsigned int, newfd)\r\n"] +[54.558292, "o", "(gdb) "] +[68.972448, "o", "#"] +[69.215133, "o", " "] +[70.145897, "o", "l"] +[70.226727, "o", "e"] +[70.397696, "o", "t"] +[70.589752, "o", "s"] +[70.722316, "o", " "] +[71.095534, "o", "e"] +[71.198966, "o", "x"] +[71.299745, "o", "a"] +[71.960882, "o", "m"] +[72.431415, "o", "\b\u001b[K"] +[72.541534, "o", "\b\u001b[K"] +[72.657555, "o", "\b\u001b[K"] +[72.763229, "o", "\b\u001b[K"] +[72.955358, "o", "b"] +[72.995208, "o", "a"] +[73.09843, "o", "c"] +[73.230889, "o", "k"] +[73.48259, "o", "t"] +[73.625035, "o", "r"] +[73.716815, "o", "a"] +[73.931987, "o", "c"] +[74.00581, "o", "e"] +[74.151293, "o", " "] +[74.244418, "o", "t"] +[74.36868, "o", "h"] +[74.435091, "o", "e"] +[74.852494, "o", " "] +[74.998958, "o", "s"] +[75.146618, "o", "y"] +[75.18531, "o", "s"] +[75.40006, "o", "t"] +[75.507571, "o", "e"] +[75.878345, "o", "m"] +[76.021645, "o", " "] +[76.139759, "o", "c"] +[76.199716, "o", "a"] +[76.298113, "o", "l"] +[76.406879, "o", "l"] +[76.499901, "o", " "] +[76.624196, "o", "f"] +[76.714473, "o", "l"] +[76.863719, "o", "o"] +[76.936706, "o", "w"] +[77.285863, "o", "\r\n"] +[77.286167, "o", "(gdb) "] +[77.609834, "o", "b"] +[77.67291, "o", "t"] +[77.908684, "o", "\r\n"] +[77.909971, "o", "#0 SyS_dup2 (oldfd=3, newfd=1) at fs/file.c:912\r\n"] +[77.910076, "o", "#1 0xc1001361 in do_syscall_32_irqs_on (regs=)\r\n at arch/x86/entry/common.c:327\r\n"] +[77.912731, "o", "#2 do_int80_syscall_32 (regs=0xc7235fb4) at arch/x86/entry/common.c:341\r\n#3 0xc14645d3 in entry_INT80_32 () at arch/x86/entry/entry_32.S:544\r\n"] +[77.91529, "o", "#4 0x00000003 in ?? ()\r\n"] +[77.917663, "o", "#5 0x00000003 in ?? ()\r\n"] +[77.92062, "o", "#6 0x0a09e224 in ?? ()\r\n"] +[77.920927, "o", "Backtrace stopped: previous frame inner to this frame (corrupt stack?)\r\n(gdb) "] +[82.733885, "o", "f"] +[82.805597, "o", "r"] +[82.943042, "o", " "] +[83.158856, "o", "1"] +[83.338543, "o", "\r\n"] +[83.338863, "o", "#1 0xc1001361 in do_syscall_32_irqs_on (regs=)\r\n at arch/x86/entry/common.c:327\r\n"] +[83.339111, "o", "327\t\t\tregs->ax = ia32_sys_call_table[nr](\r\n(gdb) "] +[84.302882, "o", "l"] +[84.466961, "o", "i"] +[84.573508, "o", "s"] +[84.805097, "o", "t"] +[86.197885, "o", " "] +[87.456892, "o", "\r\n"] +[87.457044, "o", "322\t\t\t * It's possible that a 32-bit syscall implementation\r\n323\t\t\t * takes a 64-bit parameter but nonetheless assumes that\r\n324\t\t\t * the high bits are zero. Make sure we zero-extend all\r\n325\t\t\t * of the args.\r\n326\t\t\t */\r\n327\t\t\tregs->ax = ia32_sys_call_table[nr]("] +[87.457116, "o", "\r\n328\t\t\t\t(unsigned int)regs->bx, (unsigned int)regs->cx,\r\n329\t\t\t\t(unsigned int)regs->dx, (unsigned int)regs->si,\r\n330\t\t\t\t(unsigned int)regs->di, (unsigned int)regs->bp);\r\n331\t\t}\r\n"] +[87.457622, "o", "(gdb) "] +[90.858059, "o", "#"] +[90.97271, "o", " "] +[91.144448, "o", "t"] +[91.217524, "o", "h"] +[91.269411, "o", "i"] +[91.456193, "o", "s"] +[91.63016, "o", " "] +[91.782525, "o", "l"] +[91.948946, "o", "o"] +[92.056737, "o", "o"] +[92.152544, "o", "k"] +[92.246961, "o", "s"] +[92.353698, "o", " "] +[92.444179, "o", "l"] +[92.631606, "o", "i"] +[92.770763, "o", "k"] +[92.861851, "o", "e"] +[92.969579, "o", " "] +[93.109459, "o", "t"] +[93.210131, "o", "h"] +[93.324771, "o", "e"] +[93.466555, "o", " "] +[93.619111, "o", "s"] +[93.715197, "o", "y"] +[93.795165, "o", "s"] +[93.949435, "o", "t"] +[94.027965, "o", "e"] +[94.184473, "o", " "] +[94.3008, "o", "c"] +[94.342823, "o", "a"] +[94.445488, "o", "l"] +[94.568218, "o", "l"] +[94.656948, "o", " "] +[94.839902, "o", "d"] +[94.957893, "o", "i"] +[95.075175, "o", "s"] +[95.230277, "o", "p"] +[95.339237, "o", "a"] +[95.543096, "o", "t"] +[95.751003, "o", "c"] +[95.854341, "o", "h"] +[95.943425, "o", "e"] +[96.023733, "o", "r"] +[96.172254, "o", "\r\n"] +[96.172374, "o", "(gdb) "] +[101.058686, "o", "#"] +[101.204849, "o", " "] +[101.545024, "o", "n"] +[101.645855, "o", "r"] +[101.798058, "o", " "] +[101.950607, "o", "i"] +[102.056996, "o", "s"] +[102.134187, "o", " "] +[102.342601, "o", "t"] +[102.416898, "o", "h"] +[102.533298, "o", "e"] +[102.593287, "o", " "] +[102.719888, "o", "s"] +[102.900423, "o", "y"] +[102.94586, "o", "s"] +[103.162384, "o", "t"] +[103.241973, "o", "e"] +[103.574398, "o", "m"] +[103.675272, "o", " "] +[103.802496, "o", "c"] +[103.849596, "o", "a"] +[103.955005, "o", "l"] +[104.090651, "o", "l"] +[104.1248, "o", " "] +[104.318767, "o", "n"] +[104.403953, "o", "u"] +[104.607057, "o", "m"] +[104.809716, "o", "b"] +[104.882333, "o", "e"] +[104.945321, "o", "r"] +[105.169779, "o", "\r\n"] +[105.170195, "o", "(gdb) "] +[120.867099, "o", "l"] +[121.049682, "o", "i"] +[121.155454, "o", "s"] +[121.359302, "o", "t"] +[121.479029, "o", " "] +[121.619056, "o", "3"] +[121.715251, "o", "0"] +[121.836342, "o", "0"] +[122.442593, "o", "\r\n"] +[122.447014, "o", "295\t/*\r\n296\t * Does a 32-bit syscall. Called with IRQs on in CONTEXT_KERNEL. Does\r\n297\t * all entry and exit work and returns with IRQs off. This function is\r\n298\t * extremely hot in workloads that use it, and it's usually called from\r\n299\t * do_fast_syscall_32, so forcibly inline it to improve performance.\r\n300\t */\r\n"] +[122.447378, "o", "301\tstatic __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs)\r\n302\t{\r\n303\t\tstruct thread_info *ti = current_thread_info();\r\n304\t\tunsigned int nr = (unsigned int)regs->orig_ax;\r\n(gdb) "] +[124.092652, "o", "#"] +[124.554034, "o", " "] +[124.797533, "o", "i"] +[124.905822, "o", "t"] +[125.030417, "o", " "] +[125.160131, "o", "i"] +[125.259278, "o", "s"] +[125.37996, "o", " "] +[125.517546, "o", "p"] +[125.669394, "o", "i"] +[125.760003, "o", "c"] +[125.875741, "o", "k"] +[125.996377, "o", "e"] +[126.076136, "o", "d"] +[126.247928, "o", " "] +[126.466909, "o", "u"] +[126.53385, "o", "p"] +[126.664159, "o", " "] +[127.47466, "o", "f"] +[127.507676, "o", "r"] +[127.594243, "o", "o"] +[127.686369, "o", "m"] +[127.793519, "o", " "] +[127.867138, "o", "t"] +[127.990397, "o", "h"] +[128.119207, "o", " "] +[128.507492, "o", "\b\u001b[K"] +[128.590147, "o", "e"] +[128.692867, "o", " "] +[128.889331, "o", "s"] +[129.214909, "o", "a"] +[129.981084, "o", "m"] +[130.065029, "o", "e"] +[130.186849, "o", " "] +[130.540104, "o", "s"] +[130.732803, "o", "t"] +[130.793955, "o", "r"] +[131.422146, "o", "c"] +[131.696522, "o", "t"] +[131.853467, "o", "u"] +[132.034292, "o", "r"] +[132.08244, "o", "e"] +[132.324049, "o", "\b\u001b[K"] +[132.443772, "o", "\b\u001b[K"] +[132.573348, "o", "\b\u001b[K"] +[132.694863, "o", "\b\u001b[K"] +[132.81051, "o", "\b\u001b[K"] +[132.997241, "o", "u"] +[133.088819, "o", "c"] +[133.337918, "o", "t"] +[133.454541, "o", "u"] +[133.569818, "o", "r"] +[133.609841, "o", "e"] +[133.711729, "o", " "] +[134.021296, "o", "("] +[134.244295, "o", "p"] +[134.364747, "o", "t"] +[134.518463, "o", "_"] +[134.694346, "o", "r"] +[134.756482, "o", "e"] +[135.004436, "o", "g"] +[135.409283, "o", "s"] +[135.67484, "o", ")"] +[136.719996, "o", "\r\n"] +[136.720311, "o", "(gdb) "] +[159.587179, "o", "#"] +[159.767425, "o", " "] +[159.943861, "o", "l"] +[160.036363, "o", "e"] +[160.198006, "o", "t"] +[160.366941, "o", "s"] +[160.481441, "o", " "] +[160.690788, "o", "i"] +[161.019933, "o", "n"] +[161.265835, "o", "p"] +[161.435021, "o", "s"] +[161.821488, "o", "\b\u001b[K"] +[161.951302, "o", "\b\u001b[K"] +[162.226603, "o", "s"] +[162.36373, "o", "p"] +[162.498729, "o", "e"] +[162.574127, "o", "c"] +[162.821589, "o", "t"] +[162.945206, "o", " "] +[163.051945, "o", "t"] +[163.219025, "o", "h"] +[163.276168, "o", "e"] +[163.387915, "o", " "] +[163.856566, "o", "r"] +[163.947525, "o", "e"] +[164.54179, "o", "g"] +[164.735443, "o", "s"] +[164.860377, "o", " "] +[164.983243, "o", "c"] +[165.068139, "o", "o"] +[165.142719, "o", "n"] +[165.325774, "o", "t"] +[165.347382, "o", "e"] +[165.53939, "o", "n"] +[165.632312, "o", "t"] +[165.845555, "o", "s"] +[166.537455, "o", "\r\n"] +[166.537632, "o", "(gdb) "] +[166.819034, "o", "p"] +[166.958081, "o", "r"] +[167.052344, "o", "i"] +[167.133431, "o", "n"] +[167.213977, "o", "t"] +[167.322155, "o", " "] +[167.662143, "o", "*"] +[167.894034, "o", "r"] +[167.957716, "o", "e"] +[168.123891, "o", "g"] +[168.275607, "o", "s"] +[168.368704, "o", "\r\n"] +[168.369137, "o", "value has been optimized out\r\n(gdb) "] +[169.223841, "o", "#"] +[169.68196, "o", " "] +[170.473422, "o", "o"] +[170.562498, "o", "p"] +[170.754795, "o", "t"] +[170.780246, "o", "i"] +[170.985415, "o", "m"] +[171.06, "o", "i"] +[171.118702, "o", "z"] +[171.326175, "o", "e"] +[171.422983, "o", "d"] +[171.566281, "o", " "] +[171.82724, "o", "b"] +[171.902983, "o", "y"] +[172.056031, "o", " "] +[172.291054, "o", "c"] +[172.382563, "o", "i"] +[172.439382, "o", "m"] +[172.571587, "o", "p"] +[172.870082, "o", "\b\u001b[K"] +[172.987523, "o", "\b\u001b[K"] +[173.101322, "o", "\b\u001b[K"] +[173.254872, "o", "o"] +[173.322597, "o", "m"] +[173.44459, "o", "p"] +[173.603123, "o", "i"] +[173.779322, "o", "l"] +[173.905269, "o", "e"] +[174.000955, "o", "r"] +[174.086025, "o", "."] +[174.209714, "o", "."] +[174.349851, "o", "."] +[174.538669, "o", " "] +[179.50067, "o", "g"] +[179.580322, "o", "o"] +[179.763011, "o", " "] +[179.903828, "o", "a"] +[180.013494, "o", " "] +[180.236946, "o", "f"] +[180.467494, "o", "r"] +[180.568763, "o", "a"] +[180.697886, "o", "m"] +[180.82554, "o", "e"] +[180.907141, "o", " "] +[181.169606, "o", "d"] +[181.262241, "o", "e"] +[181.42098, "o", "e"] +[181.616856, "o", "p"] +[181.793458, "o", "e"] +[181.910212, "o", "r"] +[182.544419, "o", "\r\n"] +[182.54482, "o", "(gdb) "] +[183.073511, "o", "f"] +[183.168588, "o", "r"] +[183.669953, "o", " "] +[191.37133, "o", "2"] +[191.540642, "o", "\r\n"] +[191.541499, "o", "#2 do_int80_syscall_32 (regs=0xc7235fb4) at arch/x86/entry/common.c:341\r\n"] +[191.541856, "o", "341\t\tdo_syscall_32_irqs_on(regs);\r\n(gdb) "] +[192.917131, "o", "p"] +[193.062137, "o", "r"] +[193.147118, "o", "i"] +[193.231132, "o", "n"] +[193.304903, "o", "t"] +[193.385087, "o", " "] +[193.915946, "o", "*"] +[194.135248, "o", "r"] +[194.23597, "o", "e"] +[194.394568, "o", "g"] +[194.566658, "o", "s"] +[194.653019, "o", "\r\n"] +[194.653763, "o", "$1 = {bx = 3, cx = 1, "] +[194.654076, "o", "dx = 3, si = 168423920, di = 168419876, bp = 168419336, \r\n ax = 4294967258, ds = 123, __dsh = 0, es = 123, __esh = 0, fs = 0, \r\n __fsh = 0, "] +[194.654371, "o", "gs = 0, __gsh = 0, orig_ax = 63, ip = 1150252833, cs = 115, \r\n __csh = 0, flags = 514, "] +[194.65463, "o", "sp = 3218117628, ss = 123, __ssh = 0}\r\n(gdb) "] +[198.397833, "o", "#"] +[198.589958, "o", " "] +[199.416287, "o", "\b\u001b[K"] +[199.54844, "o", "\b\u001b[K"] +[199.789439, "o", "p"] +[199.910455, "o", "r"] +[200.013253, "o", "i"] +[200.081967, "o", "n"] +[200.172051, "o", "t"] +[200.23888, "o", " "] +[200.344988, "o", "r"] +[200.443063, "o", "e"] +[200.636653, "o", "g"] +[200.903926, "o", "s"] +[201.297424, "o", "\b\u001b[K"] +[201.555797, "o", "s"] +[201.70675, "o", "\r\n"] +[201.718736, "o", "$2 = (struct pt_regs *) 0xc7235fb4\r\n(gdb) "] +[202.762192, "o", "#"] +[202.96906, "o", " "] +[203.200172, "o", "t"] +[203.335914, "o", "h"] +[203.397287, "o", "i"] +[203.509368, "o", "s"] +[203.663251, "o", " "] +[204.186121, "o", "i"] +[204.277979, "o", "s"] +[204.37577, "o", " "] +[204.575364, "o", "a"] +[204.681527, "o", " "] +[205.432843, "o", "s"] +[205.577435, "o", "a"] +[205.84148, "o", "v"] +[205.901931, "o", "e"] +[206.142056, "o", " "] +[206.48041, "o", "\b\u001b[K"] +[206.576791, "o", "d"] +[206.650202, "o", " "] +[206.846523, "o", "o"] +[207.147206, "o", " "] +[207.497795, "o", "\b\u001b[K"] +[207.682557, "o", "n"] +[207.773982, "o", " "] +[207.873402, "o", "s"] +[208.081623, "o", "t"] +[208.158662, "o", "a"] +[208.370015, "o", "c"] +[208.494404, "o", "k"] +[210.422713, "o", " "] +[212.637105, "o", "s"] +[212.787177, "o", "t"] +[212.851629, "o", "r"] +[212.997138, "o", "u"] +[213.10568, "o", "c"] +[213.316927, "o", "t"] +[213.4249, "o", "u"] +[213.518001, "o", "r"] +[213.568409, "o", "e"] +[213.68215, "o", " "] +[214.147557, "o", "w"] +[214.236414, "o", "h"] +[214.292857, "o", "i"] +[214.384715, "o", "c"] +[214.448337, "o", "h"] +[214.52596, "o", " "] +[214.62955, "o", "s"] +[214.810601, "o", "t"] +[214.922436, "o", "o"] +[215.0038, "o", "r"] +[215.075964, "o", "e"] +[215.264322, "o", "s"] +[215.370649, "o", " "] +[215.800702, "o", "u"] +[215.868643, "o", "s"] +[215.977376, "o", "e"] +[216.044684, "o", "r"] +[216.169163, "o", "s"] +[216.284503, "o", "p"] +[216.34314, "o", "a"] +[216.450213, "o", "c"] +[216.550799, "o", "e"] +[216.722203, "o", " "] +[219.131447, "o", "r"] +[219.195644, "o", "e"] +[219.377234, "o", "g"] +[219.486582, "o", "i"] +[219.520092, "o", "s"] +[219.685879, "o", "t"] +[219.754728, "o", "e"] +[219.874287, "o", "r"] +[220.02845, "o", "s"] +[221.002656, "o", " "] +[221.144104, "o", "v"] +[221.201385, "o", "a"] +[221.300797, "o", "l"] +[221.517941, "o", "u \r"] +[221.655037, "o", "e"] +[221.819353, "o", "s"] +[222.291958, "o", "\r\n"] +[222.292213, "o", "(gdb) "] +[228.409682, "o", "i"] +[228.551388, "o", "n"] +[229.037434, "o", "f"] +[229.139332, "o", "o"] +[229.400686, "o", " "] +[230.375475, "o", "r"] +[230.455225, "o", "e"] +[230.615513, "o", "\u0007"] +[231.367524, "o", "g"] +[231.608532, "o", "s"] +[231.985991, "o", "\b\u001b[K"] +[232.17368, "o", "i"] +[232.244238, "o", "s"] +[232.453164, "o", "t"] +[232.55645, "o", "e"] +[232.688874, "o", "r"] +[232.95895, "o", " "] +[233.83154, "o", "e"] +[233.978035, "o", "s"] +[234.047244, "o", "p"] +[234.168937, "o", "\r\n"] +[234.169348, "o", "esp 0xc7235f8c"] +[234.169649, "o", "\t0xc7235f8c\r\n"] +[234.16976, "o", "(gdb) "] +[250.775201, "o", "#"] +[251.106842, "o", " "] +[251.346477, "o", "h"] +[251.418165, "o", "o"] +[251.501715, "o", "w"] +[251.664792, "o", " "] +[252.067964, "o", "d"] +[252.208144, "o", "i"] +[252.87959, "o", "d"] +[253.06756, "o", " "] +[253.393168, "o", "t"] +[253.533434, "o", "h"] +[253.601184, "o", "o"] +[253.76566, "o", "s"] +[253.959997, "o", "e"] +[254.157286, "o", " "] +[254.898526, "o", "u"] +[254.999925, "o", "s"] +[255.096884, "o", "e"] +[255.171001, "o", "r"] +[255.315978, "o", "s"] +[255.418223, "o", "p"] +[255.516375, "o", "a"] +[255.651774, "o", "c"] +[255.762584, "o", "e"] +[255.92294, "o", " "] +[256.104238, "o", "r"] +[256.165681, "o", "e"] +[256.623148, "o", "g"] +[256.761757, "o", "i"] +[256.823147, "o", "s"] +[257.031872, "o", "t"] +[257.103524, "o", "e"] +[257.561344, "o", "r"] +[258.279258, "o", " "] +[258.447006, "o", "v"] +[258.502906, "o", "a"] +[258.608216, "o", "l"] +[258.786735, "o", "u"] +[258.86697, "o", "e"] +[258.972944, "o", "s"] +[259.099576, "o", " "] +[259.319621, "o", "g"] +[259.43755, "o", "o"] +[259.566479, "o", "t"] +[259.670838, "o", " "] +[259.795132, "o", "s"] +[259.867977, "o", "a"] +[260.067483, "o", "v"] +[260.107313, "o", "e"] +[260.330451, "o", "d"] +[260.681612, "o", " "] +[262.446506, "o", "o"] +[262.637035, "o", "n"] +[262.71973, "o", " "] +[262.816535, "o", "s"] +[262.986307, "o", "t"] +[263.060635, "o", "a"] +[263.228155, "o", "c"] +[263.277514, "o", "k"] +[263.566323, "o", "?"] +[263.893074, "o", "\r\n"] +[263.893184, "o", "(gdb) "] +[264.675148, "o", "#"] +[265.174988, "o", " "] +[265.776336, "o", "g"] +[265.893763, "o", "o"] +[266.031976, "o", " "] +[266.147399, "o", "a"] +[266.278558, "o", " "] +[266.768307, "o", "f"] +[266.949187, "o", "r"] +[267.030231, "o", "a"] +[267.15945, "o", "m"] +[267.376536, "o", " "] +[267.847105, "o", "\b\u001b[K"] +[267.91339, "o", "e"] +[268.015642, "o", " "] +[268.317662, "o", "d"] +[268.428077, "o", "e"] +[268.589581, "o", "e"] +[268.699983, "o", "p"] +[268.795125, "o", "e"] +[268.886986, "o", "r"] +[268.983051, "o", "."] +[269.133065, "o", "."] +[269.275845, "o", "."] +[269.70994, "o", "\r\n"] +[269.71023, "o", "(gdb) "] +[269.989964, "o", "f"] +[270.057684, "o", "r"] +[270.124032, "o", "e"] +[270.338379, "o", " "] +[272.954739, "o", "3"] +[273.342331, "o", "\r\n"] +[273.342463, "o", "Undefined command: \"fre\". Try \"help\".\r\n(gdb) "] +[274.537938, "o", "f"] +[274.591956, "o", "r"] +[274.794531, "o", " "] +[275.013426, "o", "3"] +[275.213151, "o", "\r\n"] +[275.213508, "o", "#3 0xc14645d3 in entry_INT80_32 () at arch/x86/entry/entry_32.S:544\r\n"] +[275.213876, "o", "544\t\tcall\tdo_int80_syscall_32\r\n(gdb) "] +[280.62674, "o", "l"] +[280.794815, "o", "i"] +[280.926434, "o", "s"] +[281.156915, "o", "t"] +[281.246634, "o", " "] +[281.604719, "o", "5"] +[281.88764, "o", "3"] +[281.986378, "o", "2"] +[282.314826, "o", "\r\n"] +[282.319103, "o", "527\t * edx arg3\r\n528\t * esi arg4\r\n529\t * edi arg5\r\n530\t * ebp arg6\r\n531\t */\r\n532\tENTRY(entry_INT80_32)\r\n533\t\tASM_CLAC\r\n534\t\tpushl\t%eax\t\t\t/* pt_regs->orig_ax */\r\n535\t\tSAVE_ALL pt_regs_ax=$-ENOSYS\t/* save rest */\r\n536\t\r\n"] +[282.319396, "o", "(gdb) "] +[287.032723, "o", "$"] +[287.628779, "o", "\b\u001b[K"] +[287.896648, "o", "#"] +[288.026461, "o", " "] +[288.213827, "o", "l"] +[288.286875, "o", "e"] +[288.488417, "o", "t"] +[288.672395, "o", "s"] +[289.171884, "o", " "] +[289.288632, "o", "s"] +[289.468608, "o", "e"] +[289.602164, "o", "e"] +[289.762666, "o", " "] +[289.952819, "o", "w"] +[290.078185, "o", "h"] +[290.119588, "o", "a"] +[290.337222, "o", "t"] +[290.411464, "o", " "] +[291.504878, "o", "S"] +[291.584952, "o", "A"] +[291.754893, "o", "V"] +[291.802071, "o", "E"] +[291.96154, "o", "_"] +[292.14258, "o", "A"] +[292.21097, "o", "L"] +[292.344666, "o", "L"] +[292.56581, "o", " "] +[292.756512, "o", "d"] +[292.852271, "o", "o"] +[292.933196, "o", "e"] +[293.021413, "o", "s"] +[294.080447, "o", "\r\n"] +[294.080887, "o", "(gdb) "] +[295.025805, "o", "d"] +[295.136476, "o", "i"] +[295.271217, "o", "s"] +[295.37067, "o", "a"] +[295.527332, "o", "s"] +[295.898488, "o", "s"] +[296.085096, "o", "emble "] +[297.455021, "o", "\r\n"] +[297.455375, "o", "Dump of assembler code for function entry_INT80_32:\r\n 0xc14645a4 <+0>:\tlea 0x0(%esi),%esi\r\n 0xc14645a7 <+3>:\tpush %eax\r\n 0xc14645a8 <+4>:\tcld \r\n 0xc14645a9 <+5>:\tpush $0x0\r\n 0xc14645ab <+7>:\tpush %fs\r\n 0xc14645ad <+9>:\tpush %es\r\n 0xc14645ae <+10>:\tpush %ds\r\n 0xc14645af <+11>:\tpush $0xffffffda\r\n 0xc14645b1 <+13>:\tpush %ebp\r\n"] +[297.455523, "o", " 0xc14645b2 <+14>:\tpush %edi\r\n 0xc14645b3 <+15>:\tpush %esi\r\n 0xc14645b4 <+16>:\tpush %edx\r\n 0xc14645b5 <+17>:\tpush %ecx\r\n 0xc14645b6 <+18>:\tpush %ebx\r\n"] +[297.455959, "o", " 0xc14645b7 <+19>:\tmov $0x7b,%edx\r\n 0xc14645bc <+24>:\tmov %edx,%ds\r\n 0xc14645be <+26>:\tmov %edx,%es\r\n"] +[297.456273, "o", " 0xc14645c0 <+28>:\tmov $0xd8,%edx\r\n 0xc14645c5 <+33>:\tmov %edx,%fs\r\n"] +[297.456519, "o", " 0xc14645c7 <+35>:\tcall 0xc1000ed3 \r\n 0xc14645cc <+40>:\tmov %esp,%eax\r\n 0xc14645ce <+42>:\tcall 0xc1001300 \r\n"] +[297.456787, "o", "---Type to continue, or q to quit---"] +[300.037654, "o", "q"] +[300.474906, "o", "\r\nQuit\r\n"] +[300.475028, "o", "(gdb) "] +[301.222036, "o", "#"] +[301.344949, "o", " "] +[301.53983, "o", "a"] +[301.639062, "o", "s"] +[301.804905, "o", " "] +[301.932574, "o", "e"] +[302.019082, "o", "x"] +[302.205597, "o", "p"] +[302.282764, "o", "e"] +[302.353223, "o", "c"] +[302.577703, "o", "t"] +[302.659955, "o", "e"] +[302.829647, "o", "d"] +[302.916048, "o", ","] +[302.988029, "o", " "] +[303.195687, "o", "i"] +[303.309352, "o", "t"] +[303.425016, "o", " "] +[304.041744, "o", "p"] +[304.269832, "o", "u"] +[304.428141, "o", "s"] +[304.641784, "o", "h"] +[304.756462, "o", "e"] +[304.840521, "o", "s"] +[305.137798, "o", " "] +[306.996152, "o", "r"] +[307.053783, "o", "e"] +[307.293212, "o", "s"] +[307.493748, "o", "i"] +[308.526057, "o", "\b\u001b[K"] +[308.651061, "o", "\b\u001b[K"] +[308.813776, "o", "\b\u001b[K"] +[309.52065, "o", "u"] +[309.734069, "o", "\b\u001b[K"] +[309.866326, "o", "\b\u001b[K"] +[310.014799, "o", "u"] +[310.293887, "o", "e"] +[310.409052, "o", "r"] +[310.764658, "o", "\b\u001b[K"] +[310.889594, "o", "\b\u001b[K"] +[311.047137, "o", "s"] +[311.173052, "o", "e"] +[311.228372, "o", "r"] +[311.711761, "o", "s"] +[311.809911, "o", "p"] +[311.873173, "o", "a"] +[311.988731, "o", "c"] +[312.077428, "o", "e"] +[312.161123, "o", " "] +[312.281191, "o", "r"] +[312.349935, "o", "e"] +[312.547146, "o", "g"] +[312.667304, "o", "s"] +[312.936388, "o", " "] +[313.086964, "o", "t"] +[313.154591, "o", "o"] +[313.232086, "o", " "] +[314.351931, "o", "s"] +[314.549992, "o", "t"] +[314.60986, "o", "a"] +[314.792887, "o", "c"] +[314.864733, "o", "k"] +[314.991827, "o", "\r\n"] +[314.992256, "o", "(gdb) "] +[327.831401, "o", "#"] +[328.116647, "o", " "] +[328.304643, "o", "l"] +[328.403419, "o", "e"] +[328.643693, "o", "t"] +[328.881425, "o", "s"] +[329.012271, "o", " "] +[329.350209, "o", "o"] +[329.675607, "o", "\b\u001b[K"] +[329.746522, "o", "g"] +[329.894035, "o", "o"] +[329.960528, "o", " "] +[330.265508, "o", "d"] +[330.454674, "o", "e"] +[330.61252, "o", "e"] +[330.767734, "o", "p"] +[331.017861, "o", "e"] +[331.105343, "o", "r"] +[331.193089, "o", ","] +[331.291487, "o", " "] +[331.421255, "o", "t"] +[331.499907, "o", "o"] +[331.589935, "o", " "] +[331.800311, "o", "u"] +[332.05827, "o", "s"] +[332.210451, "o", "e"] +[332.270701, "o", "r"] +[332.420355, "o", "s"] +[332.533886, "o", "p"] +[332.598023, "o", "a"] +[332.679367, "o", "c"] +[332.768566, "o", "e"] +[332.92981, "o", "\r\n"] +[332.929919, "o", "(gdb) "] +[333.474291, "o", "f"] +[333.512483, "o", "r"] +[333.670863, "o", " "] +[334.214986, "o", "2"] +[334.409422, "o", "\r\n"] +[334.410165, "o", "#2 do_int80_syscall_32 (regs=0xc7235fb4) at arch/x86/entry/common.c:341\r\n"] +[334.410451, "o", "341\t\tdo_syscall_32_irqs_on(regs);\r\n(gdb) "] +[334.795034, "o", "p"] +[334.935707, "o", "r"] +[335.041764, "o", "i"] +[335.125845, "o", "n"] +[335.256727, "o", "t"] +[335.436358, "o", " "] +[336.398283, "o", "*"] +[336.683485, "o", "r"] +[336.751145, "o", "e"] +[336.914056, "o", "g"] +[337.129438, "o", "s"] +[337.326378, "o", "\r\n"] +[337.327037, "o", "$3 = {bx = 3, cx = 1, dx = 3, "] +[337.327271, "o", "si = 168423920, di = 168419876, bp = 168419336, \r\n ax = 4294967258, ds = 123, __dsh = 0, es = 123, __esh = 0, fs = 0, \r\n"] +[337.327484, "o", " __fsh = 0, gs = 0, __gsh = 0, orig_ax = 63, ip = 1150252833, cs = 115, \r\n __csh = 0, flags = 514, sp = 3218117628, "] +[337.327641, "o", "ss = 123, __ssh = 0}\r\n(gdb) "] +[338.677993, "o", "#"] +[338.93667, "o", " "] +[339.41632, "o", "t"] +[339.584092, "o", "h"] +[340.119744, "o", "e"] +[340.257132, "o", " "] +[340.430186, "o", "p"] +[340.531965, "o", "t"] +[340.720155, "o", "_"] +[340.823874, "o", "r"] +[340.878238, "o", "e"] +[341.092385, "o", "g"] +[341.261419, "o", "s"] +[341.410487, "o", " "] +[341.687741, "o", "s"] +[341.898318, "o", "t"] +[341.974712, "o", "r"] +[342.103508, "o", "u"] +[342.220027, "o", "c"] +[342.426521, "o", "t"] +[342.533294, "o", "u"] +[342.643383, "o", "r"] +[342.706116, "o", "e"] +[342.969115, "o", " "] +[343.424403, "o", "s"] +[343.831852, "o", "a"] +[344.073427, "o", "v"] +[344.124191, "o", "e"] +[344.307738, "o", "s"] +[344.41903, "o", " "] +[344.562734, "o", "t"] +[344.695776, "o", "h"] +[344.763893, "o", "e"] +[344.926178, "o", " "] +[345.788445, "o", "E"] +[346.04075, "o", "S"] +[346.167767, "o", "P"] +[346.367919, "o", " "] +[346.480742, "o", "a"] +[346.612524, "o", "n"] +[346.708959, "o", "d"] +[346.772221, "o", " "] +[347.04116, "o", "E"] +[347.27069, "o", "I"] +[347.41795, "o", "P"] +[347.620023, "o", " "] +[348.267947, "o", "v"] +[348.310577, "o", "a"] +[348.389535, "o", "l"] +[348.583187, "o", "u"] +[348.626281, "o", "e"] +[348.75411, "o", "s"] +[349.152128, "o", " "] +[350.062966, "o", "\b\b\b\b\b\b\b"] +[350.505726, "o", "\u001b[1@r"] +[350.581222, "o", "\u001b[1@e"] +[350.733489, "o", "\u001b[1@g"] +[350.860972, "o", "\u001b[1@s"] +[351.063679, "o", "\u001b[1@ "] +[351.403168, "o", "\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C\u001b[C"] +[351.723396, "o", "a"] +[351.796061, "o", "s"] +[351.924873, "o", " "] +[352.127877, "o", "w"] +[352.258955, "o", "e"] +[352.359039, "o", "l"] +[352.480885, "o", "l"] +[352.581941, "o", "\r\n"] +[352.582063, "o", "(gdb) "] +[354.291356, "o", "p"] +[354.434906, "o", "r"] +[354.548164, "o", "i"] +[354.623451, "o", "n"] +[354.677168, "o", "t"] +[354.835361, "o", " "] +[355.104827, "o", "/"] +[355.285227, "o", "x"] +[355.434379, "o", " "] +[355.814288, "o", "r"] +[355.884763, "o", "e"] +[356.044765, "o", "g"] +[356.169825, "o", "-"] +[356.229667, "o", "s"] +[356.715904, "o", ">"] +[356.92396, "o", "\b\u001b[K"] +[357.049325, "o", "\b\u001b[K"] +[357.20555, "o", "\b\u001b[K"] +[357.296469, "o", "s"] +[357.676925, "o", ">"] +[358.100747, "o", "i"] +[358.154056, "o", "p"] +[358.530121, "o", "\b\u001b[K"] +[358.650376, "o", "\b\u001b[K"] +[358.771888, "o", "\b\u001b[K"] +[358.830286, "o", "-"] +[359.096595, "o", ">"] +[359.399708, "o", "i"] +[359.471959, "o", "p"] +[359.66492, "o", "\r\n"] +[359.69065, "o", "$4 = 0x448f7721\r\n(gdb) "] +[362.750139, "o", "d"] +[362.873006, "o", "i"] +[363.70087, "o", "s"] +[364.005826, "o", "a"] +[364.154305, "o", "s"] +[364.534403, "o", "s"] +[364.638318, "o", "emble "] +[367.274254, "o", "0x"] +[367.274386, "o", "448f"] +[367.274769, "o", "7721"] +[368.070465, "o", "-"] +[368.275859, "o", "0"] +[368.34472, "o", "x"] +[368.717017, "o", "1"] +[368.721481, "o", "2"] +[368.831229, "o", ","] +[369.57988, "o", "+"] +[369.896806, "o", "0"] +[370.018088, "o", "x"] +[370.253572, "o", "1"] +[370.325077, "o", "2"] +[370.729392, "o", "\r\n"] +[370.729561, "o", "Dump of assembler code from 0x448f770f to 0x448f7721:\r\n"] +[370.730714, "o", " 0x448f770f:\tnop\r\n"] +[370.73105, "o", " 0x448f7710:\tmov %ebx,%edx\r\n"] +[370.731361, "o", " 0x448f7712:\tmov 0x8(%esp),%ecx\r\n"] +[370.731696, "o", " 0x448f7716:\tmov 0x4(%esp),%ebx\r\n"] +[370.732008, "o", " 0x448f771a:\tmov $0x3f,%eax\r\n"] +[370.732313, "o", " 0x448f771f:\tint $0x80\r\nEnd of assembler dump.\r\n"] +[370.732521, "o", "(gdb) "] +[374.541556, "o", "#"] +[374.783001, "o", " "] +[374.898373, "o", "t"] +[375.026925, "o", "h"] +[375.073567, "o", "i"] +[375.175876, "o", "s"] +[375.257213, "o", " "] +[375.437225, "o", "l"] +[375.612851, "o", "o"] +[375.726918, "o", "o"] +[375.811791, "o", "k"] +[375.914079, "o", "s"] +[375.991534, "o", " "] +[376.124911, "o", "l"] +[376.227571, "o", "i"] +[376.402924, "o", "k"] +[376.568604, "o", "e"] +[376.709076, "o", " "] +[376.892681, "o", "t"] +[377.0435, "o", "h"] +[377.153381, "o", "e"] +[377.255434, "o", " "] +[379.598554, "o", "d"] +[379.742728, "o", "u"] +[379.818192, "o", "p"] +[379.946914, "o", "2"] +[380.101209, "o", " "] +[381.094712, "o", "i"] +[381.254821, "o", "m"] +[381.448647, "o", "p"] +[381.516377, "o", "l"] +[381.632838, "o", "e"] +[381.733497, "o", "m"] +[381.821502, "o", "e"] +[381.939177, "o", "n"] +[382.061297, "o", "t"] +[382.126975, "o", "a"] +[382.263958, "o", "t"] +[382.355856, "o", "i"] +[382.39873, "o", "o"] +[382.625141, "o", "n"] +[382.785792, "o", " "] +[382.924201, "o", "i"] +[383.007739, "o", "n"] +[383.112653, "o", " "] +[383.344371, "o", "g"] +[383.528085, "o", "l"] +[383.818807, "o", "\b\u001b[K"] +[383.940934, "o", "\b\u001b[K"] +[384.549965, "o", "l"] +[384.699794, "o", "i"] +[384.893748, "o", "b"] +[384.958022, "o", "c"] +[385.106737, "o", "\r\n"] +[385.106862, "o", "(gdb) "] +[390.638525, "o", "#"] +[390.745393, "o", " "] +[390.899618, "o", "l"] +[390.932654, "o", "e"] +[391.101789, "o", "t"] +[391.253114, "o", "s"] +[391.385379, "o", " "] +[391.507941, "o", "c"] +[391.609023, "o", "h"] +[391.674385, "o", "e"] +[391.751088, "o", "c"] +[391.811, "o", "k"] +[391.917497, "o", " "] +[392.061073, "o", "t"] +[392.14249, "o", "h"] +[392.235203, "o", "e"] +[392.357752, "o", " "] +[392.997036, "o", "s"] +[393.16912, "o", "t"] +[393.264152, "o", "a"] +[393.476405, "o", "c"] +[393.623273, "o", "k"] +[393.735766, "o", " "] +[393.886348, "o", "v"] +[393.956001, "o", "a"] +[394.095124, "o", "l"] +[394.297637, "o", "u"] +[394.373537, "o", "e"] +[394.864079, "o", "s"] +[396.020466, "o", " "] +[396.544499, "o", "a"] +[397.19223, "o", "\b\u001b[K"] +[397.516516, "o", "("] +[397.757745, "o", "d"] +[397.840435, "o", "a"] +[398.021342, "o", "r"] +[398.083794, "o", "a"] +[398.395255, "o", "\b\u001b[K"] +[398.527905, "o", "\b\u001b[K"] +[398.655121, "o", "\b\u001b[K"] +[398.791444, "o", "\b\u001b[K"] +[400.259196, "o", "\b\u001b[K"] +[400.381224, "o", "\b\u001b[K"] +[400.705872, "o", "\r\n"] +[400.705994, "o", "(gdb) "] +[405.090136, "o", "p"] +[405.250017, "o", "r"] +[405.339117, "o", "i"] +[405.408649, "o", "n"] +[405.48463, "o", "t"] +[405.598488, "o", " "] +[406.982799, "o", "/"] +[407.074314, "o", "x"] +[407.190834, "o", " "] +[407.406215, "o", "r"] +[407.477829, "o", "e"] +[407.621752, "o", "g"] +[407.761064, "o", "s"] +[407.848706, "o", "-"] +[408.12336, "o", ">"] +[409.034397, "o", "s"] +[409.101141, "o", "p"] +[409.237828, "o", "\r\n"] +[409.250562, "o", "$5 = 0xbfd093fc\r\n(gdb) "] +[410.588712, "o", "x"] +[410.779534, "o", " "] +[410.929273, "o", "/"] +[411.084678, "o", "x"] +[411.282106, "o", " "] +[412.846629, "o", "0xb"] +[412.84701, "o", "fd093fc"] +[413.495368, "o", "\r\n"] +[413.495801, "o", "0xbfd093fc:\t0x08068b46\r\n"] +[413.495927, "o", "(gdb) "] +[413.970708, "o", "\r\n"] +[413.971563, "o", "0xbfd09400:\t0x00000003\r\n"] +[413.971895, "o", "(gdb) "] +[414.429768, "o", "\r\n"] +[414.430693, "o", "0xbfd09404:\t0x00000001\r\n"] +[414.430996, "o", "(gdb) "] +[416.367425, "o", "#"] +[416.600327, "o", " "] +[417.126384, "o", "f"] +[417.206421, "o", "i"] +[417.315777, "o", "r"] +[417.716293, "o", "s"] +[418.11331, "o", "t"] +[418.641803, "o", " "] +[419.136181, "o", "s"] +[419.31382, "o", "e"] +[419.466617, "o", "e"] +[420.57146, "o", "m"] +[421.230979, "o", "\b\u001b[K"] +[421.365032, "o", "\b\u001b[K"] +[421.493488, "o", "\b\u001b[K"] +[421.621835, "o", "\b\u001b[K"] +[421.857413, "o", "i"] +[421.945647, "o", "s"] +[422.057603, "o", " "] +[422.199844, "o", "t"] +[422.257072, "o", "h"] +[422.36931, "o", "e"] +[422.432441, "o", " "] +[423.25473, "o", "r"] +[423.329204, "o", "e"] +[423.486095, "o", "t"] +[423.556503, "o", "u"] +[423.658879, "o", "r"] +[423.740909, "o", "n"] +[423.811235, "o", " "] +[423.941111, "o", "a"] +[424.030245, "o", "d"] +[424.169883, "o", "d"] +[424.362463, "o", "r"] +[424.436531, "o", "e"] +[424.621395, "o", "s"] +[424.763605, "o", "s"] +[425.22778, "o", "\r\n"] +[425.228074, "o", "(gdb) "] +[426.180565, "o", "#"] +[426.329978, "o", " "] +[426.674846, "o", "s"] +[427.644279, "o", "e"] +[427.832117, "o", "c"] +[427.945399, "o", "o"] +[428.050368, "o", "n"] +[428.1215, "o", "d"] +[428.252667, "o", " "] +[428.383049, "o", "a"] +[428.500148, "o", "n"] +[428.596744, "o", "d"] +[428.698814, "o", " "] +[428.945383, "o", "r"] +[429.106316, "o", "i"] +[429.645192, "o", "\b\u001b[K"] +[429.767912, "o", "\b\u001b[K"] +[429.845124, "o", "t"] +[430.003863, "o", "h"] +[430.107219, "o", "i"] +[430.255689, "o", "r"] +[430.487167, "o", "d"] +[430.667477, "o", " "] +[431.006023, "o", "a"] +[431.211805, "o", "r"] +[431.292319, "o", "e"] +[431.437362, "o", " "] +[431.56393, "o", "t"] +[431.683208, "o", "h"] +[431.728348, "o", "e"] +[431.865432, "o", " "] +[432.888452, "o", "p"] +[432.977097, "o", "a"] +[433.175822, "o", "r"] +[433.258553, "o", "a"] +[433.531527, "o", "m"] +[434.072679, "o", "e"] +[434.235626, "o", "t"] +[434.331498, "o", "e"] +[434.456163, "o", "r"] +[434.694781, "o", "s"] +[434.879649, "o", " "] +[435.461291, "o", "("] +[435.855804, "o", "f"] +[435.932997, "o", "d"] +[436.744873, "o", "s"] +[436.988719, "o", " "] +[438.082246, "o", "3"] +[438.216148, "o", " "] +[438.350139, "o", "a"] +[438.461137, "o", "n"] +[438.546648, "o", "d"] +[438.645921, "o", " "] +[438.899168, "o", "1"] +[439.219735, "o", ")"] +[439.481293, "o", "\r\n"] +[439.481613, "o", "(gdb) "] +[450.102183, "o", "quit\r\n"] +[450.102822, "o", "A debugging session is active.\r\n\r\n\tInferior 1 [Remote target] will be detached.\r\n\r\nQuit anyway? (y or n) "] +[451.119252, "o", "y"] +[451.379225, "o", "\r\nDetaching from program: /home/tavi/src/linux/vmlinux, Remote target\r\n"] +[451.379742, "o", "Ending remote debugging.\r\n"] +[451.390975, "o", "\u001b]0;tavi@lktp: ~/src/linux/tools/labs\u0007\u001b[01;32mtavi@lktp\u001b[00m:\u001b[01;34m~/src/linux/tools/labs\u001b[00m$ "] +[451.925851, "o", "f"] +[452.122079, "o", "g"] +[452.20423, "o", "\r\n"] +[452.2046, "o", "minicom -D serial.pts\r\n"] +[452.204924, "o", "\u001b[!p\u001b[?3;4l\u001b[4l\u001b>\u001b[?1h\u001b=\u001b[1;1H\u001b[?12l\u001b[?25h\u001b[0m\u001b(B \u001b[2;1HWelcome to minicom 2.7 \u001b[3;1H \u001b[4;1HOPTIONS: I18n \u001b[5;1HCompiled on Feb 7 2016, 13:37:27. \u001b[6;1HPort serial.pts, 23:03:56 \u001b[7;1H \u001b[8;1HPress CTRL-A Z for help on special keys \u001b[9;1H \u001b[10;1H \u001b[11;1Hroot@qemux86:~# # trigger dup2 system call \u001b[12;1Hroot@qemux86:~# echo a > /"] +[452.205008, "o", "tmp/x \u001b[13;1H \u001b[14;1H \u001b[15;1H \u001b[16;1H \u001b[17;1H \u001b[18;1H \u001b[19;1H \u001b[20;1H \u001b[21;1H \u001b[22;1H \u001b[23;1H \u001b[24;1H\u001b[0m\u001b("] +[452.205404, "o", "B\u001b[7mCTRL-A Z for help | 115200 8N1 | NOR | Minicom 2.7 | VT102 | Offline | rial.pts\u001b[13;1H\u001b[?12l\u001b[?25h\u001b[24;1H\u001b[0m\u001b(B \u001b[13;1Hroot@qemux86:~# "] +[453.066691, "o", "\u001b[0m\u001b(B\u001b[7m\u001b[24;1H\u001b[K\u001b[?12l\u001b[?25h\u001b[?25lCTRL-A Z for help | 115200 8N1 | NOR | Minicom 2.7 | VT102 | Offline | rial.pts\u001b[?12l\u001b[?25h\u001b[13;17H"] +[453.274675, "o", "\u001b[8;30H\u001b[?25l\u001b[0m\u001b(B+----------------------+\u001b[9;30H| Leave Minicom? |\u001b[10;30H| No |\u001b[11;30H+----------------------+\u001b[10;51H\u001b[?25l\u001b[10;33H\u001b[0m\u001b(B\u001b[7m Yes "] +[453.425761, "o", "\u001b[?12l\u001b[?25h\u001b[8;1H\u001b[0m\u001b(BPress CTRL-A Z for help on special keys \u001b[9;1H \u001b[10;1H \u001b[11;1Hroot@qemux86:~# # trigger dup2 system call \u001b[13;17H\u001b[0m\u001b(B\u001b[7m\u001b[?12l\u001b[?25h"] +[453.426136, "o", "\u001b[?12l\u001b[?25h\u001b[0m\u001b(B\u001b[H\u001b[2J\u001b[?12l\u001b[?25h\u001b[?1l\u001b>\u001b[!p\u001b[?3;4l\u001b[4l\u001b>"] +[453.42742, "o", "\u001b]0;tavi@lktp: ~/src/linux/tools/labs\u0007\u001b[01;32mtavi@lktp\u001b[00m:\u001b[01;34m~/src/linux/tools/labs\u001b[00m$ "] +[453.929041, "o", "#"] +[454.48124, "o", " "] +[454.772523, "o", "t"] +[454.877335, "o", "h"] +[454.967191, "o", "e"] +[455.064771, "o", " "] +[455.221252, "o", "e"] +[455.36275, "o", "n"] +[455.48508, "o", "d"] +[456.337409, "o", "\r\n"] +[456.3382, "o", "\u001b]0;tavi@lktp: ~/src/linux/tools/labs\u0007\u001b[01;32mtavi@lktp\u001b[00m:\u001b[01;34m~/src/linux/tools/labs\u001b[00m$ "] +[457.092602, "o", "exit\r\n"] diff --git a/Documentation/teaching/res/syscalls-vdso.cast b/Documentation/teaching/res/syscalls-vdso.cast new file mode 100644 index 00000000000000..08d11b8fa39bd8 --- /dev/null +++ b/Documentation/teaching/res/syscalls-vdso.cast @@ -0,0 +1,299 @@ +{"title": "VDSO", "width": 80, "height": 24, "env": {"TERM": "xterm-256color", "SHELL": "/bin/bash"}, "timestamp": 1519704037, "version": 2, "idle_time_limit": 1.0} +[0.025954, "o", "\u001b]0;tavi@lktp: ~/src/linux/tools/labs\u0007\u001b[01;32mtavi@lktp\u001b[00m:\u001b[01;34m~/src/linux/tools/labs\u001b[00m$ "] +[0.778357, "o", "\r\u001b[12P(reverse-i-search)`':\u001b[C"] +[1.32861, "o", "\b\b\b\u001b[23@m': minicom -D serial.pts\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b"] +[1.448449, "o", "\b\b\b\b\b\b\b\b\b\u001b[1@i\u001b[C\u001b[C\u001b[C"] +[1.765225, "o", "\r\u001b]0;tavi@lktp: ~/src/linux/tools/labs\u0007\u001b[01;32mtavi@lktp\u001b[00m:\u001b[01;34m~/src/linux/tools/labs\u001b[00m$ minicom -D serial.pts \b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\r\n"] +[1.769204, "o", "\u001b[!p\u001b[?3;4l\u001b[4l\u001b>\u001b[0m\u001b(B"] +[1.769319, "o", "\u001b[?1h\u001b=\u001b[H\u001b[2J"] +[1.770766, "o", "\u001b[?12l\u001b[?25h"] +[1.770874, "o", "\nWelcome to minicom 2.7\r\n\nOPTIONS: I18n \r\nCompiled on Feb 7 2016, 13:37:27.\r\nPort serial.pts, 05:00:24\r\n\nPress CTRL-A Z for help on special keys\r\n\n"] +[2.416093, "o", "\n"] +[2.418403, "o", "root@qemux86:~# "] +[3.828624, "o", "\r(reverse-i-search)`': "] +[4.158346, "o", "\b\b\bc': cat /proc/$$/maps | grep vdso\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b"] +[4.248967, "o", "\b\b\b\b\b\b\b\b\b\b\b ': cat /proc/$$/maps | grep vdso \u001b[11;21Ha': "] +[4.827489, "o", "\r\u001b[P\u001b[P\u001b[P\u001b[P\u001b[P\u001b[P\u001b[P\u001b[Proot@qemux86:~# "] +[4.82862, "o", "\r\n"] +[4.879844, "o", "b7fe1000-b7fe2000 r-xp 00000000 00:00 0 [vdso]"] +[4.880004, "o", "\r\n"] +[4.881875, "o", "root@qemux86:~# "] +[6.142231, "o", "d"] +[6.284442, "o", "d"] +[6.458279, "o", " "] +[6.621989, "o", "i"] +[6.739944, "o", "f"] +[6.869664, "o", " "] +[7.493448, "o", "\b \b"] +[7.874738, "o", "="] +[8.588815, "o", "/"] +[8.637697, "o", "d"] +[8.738769, "o", "e"] +[8.938278, "o", "v"] +[8.984739, "o", "/"] +[9.500486, "o", "$"] +[9.669718, "o", "$"] +[9.933013, "o", "/"] +[10.500669, "o", "m"] +[10.616163, "o", "e"] +[10.693377, "o", "m"] +[10.978277, "o", " "] +[11.216909, "o", "o"] +[11.376459, "o", "f"] +[11.551348, "o", "="] +[12.560851, "o", "v"] +[12.764026, "o", "d"] +[12.85403, "o", "."] +[13.112242, "o", "s"] +[13.185422, "o", "o"] +[13.657544, "o", " "] +[15.232911, "o", "s"] +[15.352051, "o", "k"] +[15.564274, "o", "i"] +[15.669058, "o", "p"] +[16.316486, "o", "="] +[17.003593, "o", "$"] +[18.445474, "o", "("] +[18.601053, "o", "("] +[19.54437, "o", "0"] +[19.660685, "o", "x"] +[20.138912, "o", "b"] +[20.616486, "o", "f"] +[21.48522, "o", "\b \b"] +[22.515307, "o", "7"] +[23.508009, "o", "f"] +[23.790619, "o", "e"] +[24.400251, "o", "1"] +[25.248207, "o", ")"] +[25.375563, "o", ")"] +[26.327645, "o", " "] +[26.726994, "o", "c"] +[26.847455, "o", "o"] +[27.02857, "o", "u"] +[27.276381, "o", "n"] +[27.383646, "o", "t"] +[27.625756, "o", "="] +[27.920608, "o", "1"] +[28.521296, "o", " "] +[29.112339, "o", "b"] +[29.222891, "o", "s"] +[29.743661, "o", "="] +[30.384493, "o", "4"] +[30.485127, "o", "0"] +[30.695845, "o", "9"] +[31.075003, "o", "6"] +[32.188246, "o", "\r\n"] +[32.199355, "o", "dd: "] +[32.19954, "o", "failed to open '/dev/885/mem'"] +[32.199693, "o", ": No such file or directory"] +[32.199842, "o", "\r\n"] +[32.201711, "o", "root@qemux86:~# "] +[33.598384, "o", "dd if=/dev/$$/mem of=vd.so skip=$((0xb7fe1)) count=1 bs=4096"] +[34.061345, "o", "\b"] +[34.562175, "o", "\b"] +[34.593086, "o", "\b"] +[34.624228, "o", "\b"] +[34.655779, "o", "\b"] +[34.686158, "o", "\b"] +[34.718106, "o", "\b"] +[34.747207, "o", "\b"] +[34.777381, "o", "\b"] +[34.808345, "o", "\b"] +[34.838882, "o", "\b"] +[34.87023, "o", "\b"] +[34.901235, "o", "\b"] +[34.931911, "o", "\b"] +[34.962706, "o", "\b"] +[34.994145, "o", "\b"] +[35.025857, "o", "\b"] +[35.055689, "o", "\b"] +[35.086722, "o", "\b"] +[35.117042, "o", "\b"] +[35.148358, "o", "\b"] +[35.179121, "o", "\b"] +[35.210605, "o", "\b"] +[35.240516, "o", "\b"] +[35.271948, "o", "\b"] +[35.30293, "o", "\b"] +[35.334901, "o", "\b"] +[35.364321, "o", "\b"] +[35.396241, "o", "\b"] +[35.426037, "o", "\b"] +[35.456994, "o", "\b"] +[35.488153, "o", "\b"] +[35.519142, "o", "\b"] +[35.550182, "o", "\b"] +[35.581047, "o", "\b"] +[35.611116, "o", "\b"] +[35.641763, "o", "\b"] +[35.672908, "o", "\b"] +[35.703498, "o", "\b"] +[35.734998, "o", "\b"] +[35.766411, "o", "\b"] +[35.796549, "o", "\b"] +[35.82753, "o", "\b"] +[36.601742, "o", "\b"] +[37.09839, "o", "\b"] +[37.128155, "o", "\b"] +[37.163111, "o", "\b"] +[37.192346, "o", "\b"] +[37.223703, "o", "\b"] +[37.253359, "o", "\b"] +[37.284626, "o", "\b"] +[37.608855, "o", "v"] +[37.939538, "o", "\b\u001b[P"] +[38.087974, "o", "\b\u001b[P"] +[38.254754, "o", "\b\u001b[P"] +[38.440987, "o", " /$$/mem of=vd.so skip=$((0xb7fe1)) count=1 bs=4096 \u001b[15;24Hp"] +[38.558039, "o", " /$$/mem of=vd.so skip=$((0xb7fe1)) count=1 bs=4096 \u001b[15;25Hr"] +[38.691276, "o", " /$$/mem of=vd.so skip=$((0xb7fe1)) count=1 bs=4096 \u001b[15;26Ho"] +[38.847178, "o", " /$$/mem of=vd.so skip=$((0xb7fe1)) count=1 bs=4096 \u001b[15;27Hc"] +[39.508168, "o", "\r\n"] +[39.523093, "o", "dd: "] +[39.52328, "o", "/proc/885/mem: cannot skip to specified offset"] +[39.523317, "o", "\r\n"] +[39.52466, "o", "1+0 records in"] +[39.524744, "o", "\r\n"] +[39.524825, "o", "1+0 records out"] +[39.525076, "o", "\r\n"] +[39.525484, "o", "4096 bytes (4.1 kB, 4.0 KiB) copied, 0.00261601 s, 1.6 MB/s"] +[39.525575, "o", "\r\n"] +[39.527474, "o", "root@qemux86:~# "] +[41.610174, "o", "n"] +[41.732264, "o", "m"] +[41.988107, "o", " "] +[42.33858, "o", "-"] +[42.79556, "o", "D"] +[43.104068, "o", " "] +[43.291478, "o", "v"] +[43.55666, "o", "d"] +[43.843116, "o", "."] +[44.185221, "o", "s"] +[44.542652, "o", "o"] +[45.448387, "o", "\r\n"] +[45.491518, "o", "00000000 A LINUX_2.5\r\n"] +[45.49171, "o", "00000000 A LINUX_2.6"] +[45.491843, "o", "\r\n"] +[45.491964, "o", "00000b4c T __kernel_rt_sigreturn"] +[45.492082, "o", "\r\n"] +[45.492159, "o", "00000b40 T __kernel_sigreturn"] +[45.492224, "o", "\r\n\u001b[23;80H \u001b[24;1H"] +[45.492353, "o", "00000b2c T __kernel_vsyscall"] +[45.492598, "o", "\r\n\u001b[23;80H \u001b[24;1H"] +[45.49281, "o", "00000710 T __vdso_clock_gettime"] +[45.492887, "o", "\r\n\u001b[23;80H \u001b[24;1H"] +[45.493019, "o", "000009a0 T __vdso_gettimeofday"] +[45.493104, "o", "\r\n\u001b[23;80H \u001b[24;1H"] +[45.493252, "o", "00000b00 T __vdso_time"] +[45.493323, "o", "\r\n\u001b[23;80H \u001b[24;1H"] +[45.495654, "o", "root@qemux86:~# "] +[47.899977, "o", "o"] +[48.346053, "o", "b"] +[48.401345, "o", "j"] +[48.663622, "o", "d"] +[48.966338, "o", "u"] +[49.273586, "o", "m"] +[49.392373, "o", "p"] +[49.52031, "o", " "] +[49.715854, "o", "-"] +[50.017434, "o", "d"] +[50.916108, "o", "r"] +[51.076094, "o", " "] +[51.470914, "o", ">"] +[51.659245, "o", " "] +[52.330585, "o", "v"] +[52.682659, "o", "d"] +[53.100659, "o", "s"] +[53.21442, "o", "o"] +[53.522123, "o", "."] +[55.809864, "o", "s"] +[57.590678, "o", "\r\n\u001b[23;80H \u001b[24;1H"] +[57.633021, "o", "objdump: "] +[57.633213, "o", "'a.out': No such file"] +[57.633325, "o", "\r\n\u001b[23;80H \u001b[24;1H"] +[57.635624, "o", "root@qemux86:~# "] +[59.8835, "o", "objdump -dr > vdso.s"] +[60.157193, "o", "\b"] +[60.660172, "o", "\b"] +[60.689762, "o", "\b"] +[60.722033, "o", "\b"] +[60.752467, "o", "\b"] +[60.783825, "o", "\b"] +[60.813211, "o", "\b"] +[60.845191, "o", "\b"] +[60.875427, "o", "\b"] +[60.905546, "o", "\b"] +[61.100984, "o", "r"] +[61.244992, "o", " "] +[61.660944, "o", " > vdso.s \u001b[24;29Hv"] +[61.996232, "o", " > vdso.s \u001b[24;30Hd"] +[62.139048, "o", " > vdso.s \u001b[24;31H."] +[62.350791, "o", " > vdso.s \u001b[24;32Hs"] +[62.450021, "o", " > vdso.s \u001b[24;33Ho"] +[62.598679, "o", " > vdso.s \u001b[24;34H "] +[62.933485, "o", "\r\n\u001b[23;80H \u001b[24;1H"] +[62.974853, "o", "root@qemux86:~# "] +[66.634044, "o", "v"] +[66.745211, "o", "i"] +[66.942786, "o", " "] +[67.232609, "o", "v"] +[67.824023, "o", "d"] +[68.377726, "o", "s"] +[68.437403, "o", "o"] +[68.723205, "o", "."] +[69.712646, "o", "s"] +[69.919804, "o", "\r\n\u001b[23;80H \u001b[24;1H"] +[69.956533, "o", "\u001b[1;1H\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\n\u001b[K\u001b[1;1H\u001b[K\nvd.so: file format elf32-i386\u001b[5;1HDisassembly of section .text:\u001b[7;1H000006d0 <__vdso_clock_gettime@@LINUX_2.6-0x40>:\u001b[8;2H6d0: 55 push %ebp\u001b[9;2H6d1: 89 e5 mov %esp,%ebp\u001b[10;2H6"] +[69.956753, "o", "d3: 53 push %ebx\u001b[11;2H6d4: e8 49 04 00 00 call b22 <__vdso_time@@LINUX_2.6+0x22>\u001b[12;2H6d9: 81 c3 6b fc ff ff add $0xfffffc6b,%ebx\u001b[13;2H6df: 0f ae e8 lfence\u001b[14;2H6e2: 0f 31 rdtsc\u001b[15;2H6e4: 89 c1 mov %eax,%ecx\u001b[16;2H6e6: 8b 83 44 cd ff ff mov -0x32bc(%ebx),%eax\u001b[17;2H6ec: 8b 9b 48 cd ff ff mov -0x32b8(%ebx),%ebx\u001b[18;2H6f2: 39 d3 cmp %edx,%ebx\u001b[19;2H6f4: 72 0e jb 704 \u001b[20;2H6f6: 76 08 jbe 700 \u001b[21;2H6f8: 89 da mov %ebx,%edx\u001b[22;2H6fa: 5b pop %ebx\u001b[23;2H6fb: 5d pop %ebp\u001b[1;1H\u001b[24;1H\u001b[K- vdso.s 1/413 0%\u001b[1;1H"] +[70.48049, "o", "\u001b[24;1H\u001b[K/"] +[71.703477, "o", "v"] +[71.956563, "o", "s"] +[72.061476, "o", "y"] +[72.151126, "o", "s"] +[72.345804, "o", "c"] +[72.43538, "o", "a"] +[72.532243, "o", "l"] +[72.655082, "o", "l"] +[72.868018, "o", "\u001b[1;1H\u001b[1;2H82e: 3d ff c9 9a 3b cmp $0x3b9ac9ff,%eax\u001b[2;1H 833: 77 eb ja 820 <__vdso_cloc"] +[72.868164, "o", "k_gettime@@LINUX_2.6+0x11\u001b[2;80H\u001b[3;2H835: 8b 75 f0 mov -0x10(%ebp),%esi\u001b[4;2H838: 01 0f add %ecx,(%edi)\u001b[5;1H 83a: 89 47 04 mov %eax,0x4(%edi)\u001b[6;2H83d: 85 f6 test %esi,%esi"] +[72.868249, "o", "\u001b[7;1H 83f: 0f 85 24 ff ff ff jne 769 <__vdso_clock_gettime@@LINUX_2.6+0x59\u001b[7;80H\u001b[8;2H845: 89 f9 mov "] +[72.86854, "o", " %edi,%ecx\u001b[9;2H847: b8 09 01 00 00 mov $0x109,%eax\u001b[10;2H84c: 89 da mov %ebx,%edx\u001b[11;2H84e: 8b 5d 08 mov 0x8(%ebp),%ebx \u001b[12;2H851: e8 d6 02 00 00 call b2c <__kernel_vsyscall@@LINUX_2.5>\u001b[13;2H856: 89 d3 mov %edx,%ebx\u001b[14;2H858: 83 c4 10 add $0x10,%esp\u001b[15;2H85b: 5b pop %ebx \u001b[16;2H85c: 5e pop %esi "] +[72.868973, "o", "\u001b[17;2H85d: 5f pop %edi \u001b[18;2H85e: 5d pop %ebp \u001b[19;2H85f: c3 ret \u001b[20;2H860: 8b 45 08 mov 0x8(%ebp),%eax \u001b[21;2H863: 85 c0 test %eax,%ea\u001b[22;2H865: 75 de jne 845 <__vdso_clock_gettime@@LINUX_2.6+0x13\u001b[22;80H\u001b[23;2H867: 89 7d 0c mov %edi,0xc(%ebp)\u001b[12;54H\u001b[24;1H\u001b[K- vdso.s 135/413 32%\u001b[12;54H"] +[73.659739, "o", "\u001b[24;1H\u001b[K/"] +[73.751232, "o", "\u001b[12;54H\u001b[1;2Hac2: e9 fe fe ff ff jmp 9c5 <__vdso_gettimeofday@@LINUX_2.6+0x25>\u001b[1;80H\u001b[2;2Hac7: 8b 97 84 cd ff ff mov -0x327c(%edi),%edx \u001b[2;80H\u001b[3;2Hacd: 8b 5d 0c mov 0xc(%ebp),%ebx \u001b[4;2Had0: 89 13 mov %edx,(%ebx\u001b[5;2Had2: 8b 97 88 cd ff ff mov -0x3278(%edi),%edx\u001b[6;2Had8: 89 53 04 "] +[73.751392, "o", " mov %edx,0x4(%ebx)\u001b[7;2Hadb: eb 95 jmp a72 <__vdso_gettimeofday@@LINUX_2.6+0xd2>\u001b[7;80H\u001b[8;2Hadd: b8 4e 00 00 00 mov $0x4e,%eax\u001b[9;2Hae2: 8b 4d 0c mov 0xc(%ebp),%ecx\u001b[10;2Hae5\u001b[11;2Hae7: 89 f3 mov %esi,%ebx \u001b[12;2Hae9: e8 3e 00\u001b[13;2Haee\u001b[14;2Haf0: e9 7d ff ff ff jmp a72 <__vdso_gettimeofday@@LINUX_2.6+0xd2>\u001b[14;80H\u001b[15;2Haf5: 8d 74 26 00 lea 0x0(%esi,%eiz,1),%esi\u001b[16;2Haf9: 8d bc 27 00 00 00 00 lea 0x0(%edi,%eiz,1),%edi\u001b[17;2H \u001b[18;1H00000b00 <__vdso_time@@LINUX_2.6>: \u001b[19;2Hb00: 55 push %ebp\u001b[20;2Hb01: e8 18 00 00 00 call b1e <__vdso_time@@LINUX_2.6+0x1e>\u001b[21;2Hb06: 05 3e f8 ff ff add $0xfffff83e,%eax\u001b[22;2Hb0b: 89 e5 mov %esp,%ebp \u001b[22;80H\u001b[23;2Hb0d: 8b 55 08 mov 0x8(%ebp),%edx\u001b[12;54H\u001b[24;1H\u001b[K- vd"] +[73.751422, "o", "so.s 351/413 84%\u001b[12;54H"] +[74.638384, "o", "\u001b[24;1H\u001b[K/"] +[74.706985, "o", "\u001b[12;54H\u001b[1;2Hb1c: 5d pop %ebp \u001b[1;80H\u001b[2;2Hb1d: c3 ret \u001b[3;2Hb1e: 8b 04 24 mov (%esp),%eax \u001b[4;2Hb21: c3 ret "] +[74.707435, "o", " \u001b[5;2Hb22: 8b 1c 24 mov (%esp),%ebx \u001b[6;2Hb25: c3 ret \u001b[7;2Hb26: 8b 3c 24 mov (%esp),%edi \u001b[7;80H\u001b[8;2Hb29: c3 ret \u001b[9;2Hb2a: 90 nop \u001b[10;2Hb2b: 90 nop \u001b[11;2H \u001b[12;1H00000b2c <__kernel_vsyscall@@LINUX_2.5>: \u001b[13;2Hb2c: 51 push %ecx \u001b[14;2Hb2d: 52 push %edx \u001b[14;80H\u001b[15;2Hb2e: 55 push %ebp \u001b[16;2Hb2f: 89 e5 mov %esp,%ebp \u001b[17;2Hb31: 0f 34 sysenter\u001b[18;1H b33: cd 80 int $0x80"] +[74.707709, "o", "\u001b[19;3H35: 5d pop \u001b[20;3H36: 5a pop %edx \u001b[21;3H37: 59 pop %ecx \u001b[22;3H38: c3 ret \u001b[23;3H39: 90 nop \u001b[12;20H\u001b[24;1H\u001b[K- vdso.s 378/413 91%\u001b[12;20H"] +[75.741371, "o", "\u001b[13;20H\u001b[24;1H\u001b[K- vdso.s 379/413 91%\u001b[13;20H"] +[76.240657, "o", "\u001b[14;20H\u001b[24;1H\u001b[K- vdso.s 380/413 92%\u001b[14;20H"] +[76.271822, "o", "\u001b[15;20H\u001b[24;1H\u001b[K- vdso.s 381/413 92%\u001b[15;20H"] +[76.303272, "o", "\u001b[16;20H\u001b[24;1H\u001b[K- vdso.s 382/413 92%\u001b[16;20H"] +[76.335376, "o", "\u001b[17;20H\u001b[24;1H\u001b[K- vdso.s 383/413 92%\u001b[17;20H"] +[76.365685, "o", "\u001b[18;20H\u001b[24;1H\u001b[K- vdso.s 384/413 92%\u001b[18;20H"] +[76.396024, "o", "\u001b[19;20H\u001b[24;1H\u001b[K- vdso.s 385/413 93%\u001b[19;20H"] +[76.426024, "o", "\u001b[20;20H\u001b[24;1H\u001b[K- vdso.s 386/413 93%\u001b[20;20H"] +[76.633377, "o", "\u001b[21;20H\u001b[24;1H\u001b[K- vdso.s 387/413 93%\u001b[21;20H"] +[76.801144, "o", "\u001b[22;20H\u001b[24;1H\u001b[K- vdso.s 388/413 93%\u001b[22;20H"] +[76.95044, "o", "\u001b[23;20H\u001b[24;1H\u001b[K- vdso.s 389/413 94%\u001b[23;20H"] +[77.101372, "o", "\u001b[1;4Hd: c3 ret \u001b[2;4He: 8b 04 24 mov (%esp),%eax\u001b[3;3H21: c3 ret \u001b[4;4H2: 8b 1c 24 mov (%esp),%ebx\u001b[5;4H5: c3 ret "] +[77.10181, "o", " \u001b[6;4H6: 8b 3c 24 mov (%esp),%edi\u001b[7;4H9: c3 ret \u001b[8;4Ha: 90 nop\u001b[9;4Hb\u001b[10;2H \u001b[11;1H00000b2c <__kernel_vsyscall@@LINUX_2.5>:\u001b[12;1H b2c: 51 push %ecx\u001b[13;4Hd: 52 push %ed\u001b[14;4He: 55 push %ebp\u001b[15;4Hf: 89 e5 mov %esp,%ebp\u001b[16;3H31: 0f 34 sysenter \u001b[17;4H3: cd 80 int $0x80\u001b[18;4H5: 5d pop %ebp \u001b[19;4H6: 5a pop %edx\u001b[20;4H7: 59 pop %ec\u001b[21;4H8: c3 ret \u001b[22;4H9: 90 nop\u001b[23;4Ha\u001b[23;20H\u001b[24;1H\u001b[K- vdso.s 390/413 94%\u001b[23;20H"] +[77.455803, "o", "\u001b[1;4He: 8b 04 24 mov (%esp),%eax\u001b[2;3H21: c3 ret \u001b[3;4H2: 8b 1c 24 mov (%esp),%ebx\u001b[4;4H5: c3 ret \u001b[5;4H6: 8b 3c 24 mov (%esp),%edi\u001b[6;4H9: c3 ret \u001b[7;4Ha: 90 nop\u001b[8;4Hb\u001b[9;2H \u001b[10;1H00000b2c <__kernel_vsyscall@@LINUX_2.5>:\u001b[11;1H b2c: 51 push %ecx\u001b[12;4H"] +[77.455926, "o", "d: 52 push %ed\u001b[13;4He: 55 push %ebp\u001b[14;4Hf: 89 e5 mov %esp,%ebp\u001b[15;3H31: 0f 34 sysenter \u001b[16;4H3: cd 80 int $0x80\u001b[17;4H5: 5d pop %ebp \u001b[18;4H6: 5a pop %edx\u001b[19;4H7: 59 pop %ec\u001b[20;4H8: c3 ret \u001b[21;4H9: 90 nop\u001b[22;4Ha\u001b[23;4Hb\u001b[23;20H\u001b[24;1H\u001b[K- vdso.s 391/413 94%\u001b[23;20H"] +[85.076246, "o", "\u0007\u001b[24;1H\u001b[K- vdso.s 391/413 94%\u001b[23;20H"] +[85.088716, "o", "\u001b[24;1H\u001b[K:"] +[85.733973, "o", "q"] +[86.200246, "o", "!"] +[86.579774, "o", "\u001b[23;20H\u001b[24;1H\u001b[K- vdso.s 391/413 94%\u001b[23;20H"] +[86.580204, "o", "\u001b[24;1H\u001b[K"] +[86.586893, "o", "root@qemux86:~# "] +[87.180212, "o", "\u001b[0m\u001b(B\u001b[7m\r\u001b[K\u001b[?12l\u001b[?25h"] +[87.180527, "o", "\u001b[?25lCTRL-A Z for help | 115200 8N1 | NOR | Minicom 2.7 | VT102 | Offline | rial.pts\u001b[?12l\u001b[?25h\u001b[24;17H"] +[87.375774, "o", "\u001b[8;30H\u001b[?25l\u001b[0m\u001b(B+----------------------+\u001b[9;30H| Leave Minicom? |\u001b[10;30H| No |\u001b[11;30H+----------------------+\u001b[10;51H\u001b[?25l\u001b[10;33H\u001b[0m\u001b(B\u001b[7m Yes "] +[87.691546, "o", "\u001b[?12l\u001b[?25h\u001b[8;1H\u001b[0m\u001b(B b2b: 90 nop \u001b[9;1H \u001b[10;1H00000b2c <__kernel_vsyscall@@LINUX_2.5>: \u001b[11;1H b2c: 51 push %ecx \u001b[24;17H\u001b[0m\u001b(B\u001b[7m"] +[87.691675, "o", "\u001b[?12l\u001b[?25h"] +[87.691981, "o", "\u001b[?12l\u001b[?25h\u001b[0m\u001b(B\u001b[H\u001b[2J\u001b[?12l\u001b[?25h\u001b[?1l\u001b>\u001b[!p\u001b[?3;4l\u001b[4l\u001b>"] +[87.69329, "o", "\u001b]0;tavi@lktp: ~/src/linux/tools/labs\u0007\u001b[01;32mtavi@lktp\u001b[00m:\u001b[01;34m~/src/linux/tools/labs\u001b[00m$ "] +[88.615113, "o", "exit\r\n"] diff --git a/Documentation/teaching/res/tso.png b/Documentation/teaching/res/tso.png new file mode 100755 index 00000000000000..a43f230d4a97fa Binary files /dev/null and b/Documentation/teaching/res/tso.png differ diff --git a/Documentation/teaching/res/write.png b/Documentation/teaching/res/write.png new file mode 100644 index 00000000000000..d87abc99e511b4 Binary files /dev/null and b/Documentation/teaching/res/write.png differ diff --git a/Documentation/teaching/res/write2.png b/Documentation/teaching/res/write2.png new file mode 100644 index 00000000000000..e533a36a515b96 Binary files /dev/null and b/Documentation/teaching/res/write2.png differ diff --git a/Documentation/teaching/res/xen-overview.png b/Documentation/teaching/res/xen-overview.png new file mode 100755 index 00000000000000..9294dfba91fc51 Binary files /dev/null and b/Documentation/teaching/res/xen-overview.png differ diff --git a/Documentation/teaching/so2/assign-collaboration.rst b/Documentation/teaching/so2/assign-collaboration.rst new file mode 100644 index 00000000000000..188c18bc66e762 --- /dev/null +++ b/Documentation/teaching/so2/assign-collaboration.rst @@ -0,0 +1,144 @@ +============= +Collaboration +============= + +Collaboration is essential in open source world and we encourage you +to pick a team partner to work on selected assignments. + +Here is a simple guide to get you started: + +1. Use Github / Gitlab +---------------------- + +Best way to share your work inside the team is to use a version control system (VCS) +in order to track each change. Mind that you must make your repo private and only allow +read/write access rights to team members. + +2. Start with a skeleton for the assignment +------------------------------------------- + +Add `init`/`exit` functions, driver operations and global structures that you driver might need. + +.. code-block:: c + + // SPDX-License-Identifier: GPL-2.0 + /* + * uart16550.c - UART16550 driver + * + * Author: John Doe + * Author: Ionut Popescu + */ + struct uart16550_dev { + struct cdev cdev; + /*TODO */ + }; + + static struct uart16550_dev devs[MAX_NUMBER_DEVICES]; + + static int uart16550_open(struct inode *inode, struct file *file) + { + /*TODO */ + return 0; + } + + static int uart16550_release(struct inode *inode, struct file *file) + { + /*TODO */ + return 0; + } + + static ssize_t uart16550_read(struct file *file, char __user *user_buffer, + size_t size, loff_t *offset) + { + /*TODO */ + } + + static ssize_t uart16550_write(struct file *file, + const char __user *user_buffer, + size_t size, loff_t *offset) + { + /*TODO */ + } + + static long + uart16550_ioctl(struct file *file, unsigned int cmd, unsigned long arg) + { + /*TODO */ + return 0; + } + + static const struct file_operations uart16550_fops = { + .owner = THIS_MODULE, + .open = uart16550_open, + .release = uart16550_release, + .read = uart16550_read, + .write = uart16550_write, + .unlocked_ioctl = uart16550_ioctl + }; + + static int __init uart16550_init(void) + { + /* TODO: */ + } + + static void __exit uart16550_exit(void) + { + /* TODO: */ + } + + module_init(uart16550_init); + module_exit(uart16550_exit); + + MODULE_DESCRIPTION("UART16550 Driver"); + MODULE_AUTHOR("John Doe tema2) + Author: John Doe + Date: Mon Apr 4 11:54:39 2022 +0300 + + uart16550: Add initial skeleton for ssignment #2 + + This adds simple skeleton file for uart16550 assignment. Notice + module init/exit callbacks and file_operations dummy implementation + for open/release/read/write/ioctl. + + Signed-off-by: John Doe + +4. Split the work inside the team +--------------------------------- + +Add `TODOs` with each team member tasks. Try to split the work evenly. + +Before starting to code, make a plan. On top of your skeleton file, add TODOs with each member tasks. Agree on global +structures and the overall driver design. Then start coding. + +5. Do reviews +------------- + +Create Pull Requests with your commits and go through review rounds with your team members. You can follow `How to create a PR` `video `_. + +6. Merge the work +----------------- + +The final work is the result of merging all the pull requests. Following the commit messages +one should clearly understand the progress of the code and how the work was managed inside the team. + +.. code-block:: console + + f5118b873294 uart16550: Add uart16550_interrupt implementation + 2115503fc3e3 uart16550: Add uart16550_ioctl implementation + b31a257fd8b8 uart16550: Add uart16550_write implementation + ac1af6d88a25 uart16550: Add uart16550_read implementation + 9f680e8136bf uart16550: Add uart16550_open/release implementation + 3c92a02cc527 uart16550: Add skeleton for SO2 assignment #2 diff --git a/Documentation/teaching/so2/assign0-kernel-api.rst b/Documentation/teaching/so2/assign0-kernel-api.rst new file mode 100644 index 00000000000000..835eccef15fa0b --- /dev/null +++ b/Documentation/teaching/so2/assign0-kernel-api.rst @@ -0,0 +1,111 @@ +========================= +Assignment 0 - Kernel API +========================= + +- Deadline: :command:`Monday, 25 March 2024, 23:59` + +Assignment's Objectives +======================= + +* getting familiar with the qemu setup +* loading/unloading kernel modules +* getting familiar with the list API implemented in the kernel +* have fun :) + +Statement +========= + +Write a kernel module called `list` (the resulting file must be called `list.ko`) which stores data (strings) +in an internal list. + +It is mandatory to use `the list API `__ +implemented in the kernel. +For details you can take a look at `the laboratory 2 `__. + +The module exports a directory named :command:`list` to procfs. The directory contains two files: + +- :command:`management`: with write-only access; is the interface for transmitting commands to the kernel module +- :command:`preview`: with read-only access; is the interface through which the internal contents of the kernel list can be viewed. + +`The code skeleton `__ implements the two procfs files. +You will need to create a list and implement support for `adding` and `reading` data. Follow the TODOs in the code for details. + +To interact with the kernel list, you must write commands (using the `echo` command) in the `/proc/list/management` file: + +- `addf name`: adds the `name` element to the top of the list +- `adde name`: adds the `name` element to the end of the list +- `delf name`: deletes the first appearance of the `name` item from the list +- `dela name`: deletes all occurrences of the `name` element in the list + +Viewing the contents of the list is done by viewing the contents of the `/proc/list/preview` file (use the` cat` command). +The format contains one element on each line. + +Testing +======= + +In order to simplify the assignment evaluation process, but also to reduce the mistakes of the submitted assignments, +the assignment evaluation will be done automatically with the help of a +`test script `__ called `_checker`. +The test script assumes that the kernel module is called `list.ko`. + +QuickStart +========== + +It is mandatory to start the implementation of the assignment from the code skeleton found in the `list.c `__ file. +You should follow the instructions in the `README.md file `__ of the `assignment's repo `__. + +Tips +---- + +To increase your chances of getting the highest grade, read and follow the Linux kernel +coding style described in the `Coding Style document `__. + +Also, use the following static analysis tools to verify the code: + +- checkpatch.pl + +.. code-block:: console + + $ linux/scripts/checkpatch.pl --no-tree --terse -f /path/to/your/list.c + +- sparse + +.. code-block:: console + + $ sudo apt-get install sparse + $ cd linux + $ make C=2 /path/to/your/list.c + +- cppcheck + +.. code-block:: console + + $ sudo apt-get install cppcheck + $ cppcheck /path/to/your/list.c + +Penalties +--------- +Information about assigments penalties can be found on the +`General Directions page `__. + +In exceptional cases (the assigment passes the tests by not complying with the requirements) +and if the assigment does not pass all the tests, the grade will may decrease more than mentioned above. + +Submitting the assigment +------------------------ + +The assignment will be graded automatically using the `vmchecker-next `__ infrastructure. +The submission will be made on moodle on the `course's page `__ to the related assignment. +You will find the submission details in the `README.md file `__ of the `repo `__. + +Resources +========= + +We recommend that you use gitlab to store your homework. Follow the directions in +`README.md file `__. + +Questions +========= + +For questions about the topic, you can consult the mailing `list archives `__ +or you can write a question on the dedicated Teams channel. diff --git a/Documentation/teaching/so2/assign1-kprobe-based-tracer.rst b/Documentation/teaching/so2/assign1-kprobe-based-tracer.rst new file mode 100644 index 00000000000000..c419a0fbf3aefb --- /dev/null +++ b/Documentation/teaching/so2/assign1-kprobe-based-tracer.rst @@ -0,0 +1,182 @@ +================================== +Assignment 1 - Kprobe based tracer +================================== + +- Deadline: :command:`Monday, 8 April 2024, 23:59` + +Assignment's Objectives +======================= + +* gaining knowledge related to the instrumentation of functions in the Linux kernel (``kretprobes`` mechanism) +* gaining knowledge regarding the ``/proc`` file system from the Linux kernel +* get familiar with data structures specific to the Linux kernel (``hash table`` and ``list``) + +Statement +========= + +Build a kernel operations surveillant. + +With this surveillant, we aim to intercept: + +* ``kmalloc`` and ``kfree`` calls +* ``schedule`` calls +* ``up`` and ``down_interruptible`` calls +* ``mutex_lock`` and ``mutex_unlock`` calls + +The surveillant will hold, at the process level, the number of calls for each of the above functions. +For the ``kmalloc`` and ``kfree`` calls the total quantity of allocated and deallocated memory will be +shown. + +The surveillant will be implemented as a kernel module with the name ``tracer.ko``. + +Implementation details +---------------------- + +The interception will be done by recording a sample (``kretprobe``) for each of the above functions. The +surveillant will retain a list/hashtable with the monitored processes and will account for +the above information for these processes. + +For the control of the list/hashtable with the monitored processes, a char device called ``/dev/tracer`` +will be used, with major `10` and minor `42`. It will expose an ``ioctl`` interface with two arguments: + +* the first argument is the request to the monitoring subsystem: + + * ``TRACER_ADD_PROCESS`` + * ``TRACER_REMOVE_PROCESS`` + +* the second argument is the PID of the process for which the monitoring request will be executed + +In order to create a char device with major `10` you will need to use the `miscdevice `__ interface in the kernel. +Definitions of related macros can be found in the `tracer.h header `__. + +Since the ``kmalloc`` function is inline for instrumenting the allocated amount of memory, the ``__kmalloc`` +function will be inspected as follows: + +* a ``kretprobe`` will be used, which will retain the amount of memory allocated and the address of the allocated memory area. +* the ``.entry_handler`` and ``.handler`` fields in the ``kretprobe`` structure will be used to retain information about the amount of memory allocated and the address from which the allocated memory starts. + +.. code-block:: C + + static struct kretprobe kmalloc_probe = { + .entry_handler = kmalloc_probe_entry_handler, /* entry handler */ + .handler = kmalloc_probe_handler, /* return probe handler */ + .maxactive = 32, + }; + +Since the ``kfree`` function only receives the address of the memory area to be freed, in order to determine +the total amount of memory freed, we will need to determine its size based on the address of the area. +This is possible because there is an address-size association made when inspecting the ``__kmalloc`` function. + +For the rest of the instrumentation functions it is enough to use a ``kretprobe``. + +.. code-block:: C + + static struct kretprobe up_probe = { + .entry_handler = up_probe_handler, + .maxactive = 32, + }; + +The virtual machine kernel has the ``CONFIG_DEBUG_LOCK_ALLOC`` option enabled where the ``mutex_lock`` symbol +is a macro that expands to ``mutex_lock_nested``. Thus, in order to obtain information about the ``mutex_lock`` +function you will have to instrument the ``mutex_lock_nested`` function. + +Processes that have been added to the list/hashtable and that end their execution will be removed +from the list/hashtable. Also, a process will be removed from the dispatch list/hashtable following +the ``TRACER_REMOVE_PROCESS`` operation. + +The information retained by the surveillant will be displayed via the procfs file system, in the ``/proc/tracer`` file. +For each monitored process an entry is created in the ``/proc/tracer`` file having as first field the process PID. +The entry will be read-only, and a read operation on it will display the retained results. An example of +displaying the contents of the entry is: + +.. code-block:: console + + $cat /proc/tracer + PID kmalloc kfree kmalloc_mem kfree_mem sched up down lock unlock + 42 12 12 2048 2048 124 2 2 9 9 + 1099 0 0 0 0 1984 0 0 0 0 + 1244 0 0 0 0 1221 100 1023 1023 1002 + 1337 123 99 125952 101376 193821 992 81921 7421 6392 + +Testing +======= + +In order to simplify the assignment evaluation process, but also to reduce the mistakes of the submitted assignments, +the assignment evaluation will be done automatically with the help of a +`test script `__ called `_checker`. +The test script assumes that the kernel module is called `tracer.ko`. + +QuickStart +========== + +It is mandatory to start the implementation of the assignment from the code skeleton found in the `src `__ directory. +There is only one header in the skeleton called `tracer.h `__. +You will provide the rest of the implementation. You can add as many `*.c`` sources and additional `*.h`` headers. +You should also provide a Kbuild file that will compile the kernel module called `tracer.ko`. +Follow the instructions in the `README.md file `__ of the `assignment's repo `__. + + +Tips +---- + +To increase your chances of getting the highest grade, read and follow the Linux kernel +coding style described in the `Coding Style document `__. + +Also, use the following static analysis tools to verify the code: + +- checkpatch.pl + +.. code-block:: console + + $ linux/scripts/checkpatch.pl --no-tree --terse -f /path/to/your/tracer.c + +- sparse + +.. code-block:: console + + $ sudo apt-get install sparse + $ cd linux + $ make C=2 /path/to/your/tracer.c + +- cppcheck + +.. code-block:: console + + $ sudo apt-get install cppcheck + $ cppcheck /path/to/your/tracer.c + +Penalties +--------- + +Information about assigments penalties can be found on the +`General Directions page `__. In addition, the following +elements will be taken into account: + +* *-2*: missing of proper disposal of resources (``kretprobes``, entries in ``/proc``) +* *-2*: data synchronization issues for data used by multiple executing instances (e.g. the list/hashtable) + +In exceptional cases (the assigment passes the tests but it is not complying with the requirements) +and if the assigment does not pass all the tests, the grade may decrease more than mentioned above. + +Submitting the assigment +------------------------ + +The assignment will be graded automatically using the `vmchecker-next `__ infrastructure. +The submission will be made on moodle on the `course's page `__ to the related assignment. +You will find the submission details in the `README.md file `__ of the `repo `__. + + +Resources +========= + +* `Documentation/kprobes.txt `__ - description of the ``kprobes`` subsystem from Linux kernel sources. +* `samples/kprobes/ `__ - some examples of using ``kprobes`` from Linux kernel sources. + +We recommend that you use gitlab to store your homework. Follow the directions in +`README `__. + +Questions +========= + +For questions about the topic, you can consult the mailing `list archives `__ +or you can write a question on the dedicated Teams channel. diff --git a/Documentation/teaching/so2/assign2-driver-uart.rst b/Documentation/teaching/so2/assign2-driver-uart.rst new file mode 100644 index 00000000000000..0622965b4c8240 --- /dev/null +++ b/Documentation/teaching/so2/assign2-driver-uart.rst @@ -0,0 +1,152 @@ +========================== +Assignment 2 - Driver UART +========================== + +- Deadline: :command:`Monday, 22 April 2024, 23:59` +- The assigment is individual + +Assignment's Objectives +======================= + +* consolidating the knowledge of device drivers +* read hardware documentation and track the desired functionality in the documentation +* work with interrupts; use of non-blocking functions in interrupt context +* use of buffers; synchronization +* kernel modules with parameters + +Statement +========= + +Write a kernel module that implements a driver for the serial port (`UART16550`). +The device driver must support the two standard serial ports in a PC, `COM1` and `COM2` (`0x3f8` and `0x2f8`, +in fact the entire range of `8` addresses `0x3f8-0x3ff` and `0x2f8-0x2ff` specific to the two ports). +In addition to the standard routines (`open`, `read`, `write`, `close`), +the driver must also have support for changing communication parameters using an `ioctl` operation (`UART16550_IOCTL_SET_LINE`). + +The driver must use interrupts for both reception and transmission to reduce latency and CPU usage time. +`Read` and `write` calls must also be blocking. :command:`Assignments that do not meet these requirements will not be considered.` +It is recommended that you use a buffer for the read routine and another buffer for the write routine for each serial port in the driver. + +A blocking read call means that the read routine called from the user-space will be blocked until :command:`at least` one byte is read +(the read buffer in the kernel is empty and no data can be read). +A blocking write call means that the write routine called from the user-space will be blocked until :command:`at least` one byte is written +(the write buffer in the kernel is full and no data can be written). + +Buffers Scheme +-------------- + +.. image:: ../img/buffers-scheme.png + +Data transfer between the various buffers is a `Producer-Consumer `__ problem. Example: + +- The process is the producer and the device is the consumer if it is written from the process to the device; the process will block until there is at least one free space in the consumer's buffer + +- The process is the consumer and the device is the producer if it is read from a process from the device; the process will block until there is at least one element in the producer's buffer. + +Implementation Details +====================== + +- the driver will be implemented as a kernel module named :command:`uart16550.ko` +- the driver will be accessed as a character device driver, with different functions depending on the parameters transmitted to the load module: + + - the `major` parameter will specify the major with which the device must be registered + - the `option` parameter will specify how it works: + + - OPTION_BOTH: will also register COM1 and COM2, with the major given by the `major` parameter and the minors 0 (for COM1) and 1 (for COM2); + - OPTION_COM1: will only register COM1, with the major `major` and minor 0; + - OPTION_COM2: will only register COM2, with the major `major` and minor 1; + - to learn how to pass parameters in Linux, see `tldp `__ + - the default values are `major=42` and `option=OPTION_BOTH`. +- the interrupt number associated with COM1 is 4 (`IRQ_COM1`) and the interrupt number associated with COM2 is 3 (`IRQ_COM2`) +- `the header `__ with the definitions needed for special operations; +- a starting point in implementing read / write routines is the `example `__ of uppercase / lowercase character device driver; the only difference is that you have to use two buffers, one for read and one for write; +- you can use `kfifo `__ for buffers; +- you do not have to use deferred functions to read / write data from / to ports (you can do everything from interrupt context); +- you will need to synchronize the read / write routines with the interrupt handling routine for the routines to be blocking; it is recommended to use `synchronization with waiting queues `__ +- In order for the assigment to work, the `default serial driver` must be disabled: + + - `cat /proc/ioports | grep serial` will detect the presence of the default driver on the regions where COM1 and COM2 are defined + - in order to deactivate it, the kernel must be recompiled, either by setting the serial driver as the module, or by deactivating it completely (this modification is already made on the virtual machine) + + - `Device Drivers -> Character devices -> Serial driver -> 8250/16550 and compatible serial support.` + +Testing +======= +In order to simplify the assignment evaluation process, but also to reduce the mistakes of the submitted assignments, +the assignment evaluation will be done automatically with the help of a +`test script `__ called `_checker`. +The test script assumes that the kernel module is called `uart16550.ko`. + +QuickStart +========== + +It is mandatory to start the implementation of the assignment from the code skeleton found in the `src `__ directory. +There is only one header in the skeleton called `uart16550.h `__. +You will provide the rest of the implementation. You can add as many `*.c`` sources and additional `*.h`` headers. +You should also provide a Kbuild file that will compile the kernel module called `uart16550.ko`. +Follow the instructions in the `README.md file `__ of the `assignment's repo `__. + + +Tips +---- + +To increase your chances of getting the highest grade, read and follow the Linux kernel +coding style described in the `Coding Style document `__. + +Also, use the following static analysis tools to verify the code: + +- checkpatch.pl + +.. code-block:: console + + $ linux/scripts/checkpatch.pl --no-tree --terse -f /path/to/your/list.c + +- sparse + +.. code-block:: console + + $ sudo apt-get install sparse + $ cd linux + $ make C=2 /path/to/your/list.c + +- cppcheck + +.. code-block:: console + + $ sudo apt-get install cppcheck + $ cppcheck /path/to/your/list.c + +Penalties +--------- + +Information about assigments penalties can be found on the +`General Directions page `__. + +In exceptional cases (the assigment passes the tests by not complying with the requirements) +and if the assigment does not pass all the tests, the grade will may decrease more than mentioned above. + +Submitting the assigment +------------------------ + +The assignment will be graded automatically using the `vmchecker-next `__ infrastructure. +The submission will be made on moodle on the `course's page `__ to the related assignment. +You will find the submission details in the `README.md file `__ of the `repo `__. + + +Resources +========= + +- serial port documentation can be found on `tldp `__ +- `table with registers `__ +- `datasheet 16550 `__ +- `alternative documentation `__ + +We recommend that you use gitlab to store your homework. Follow the directions in +`README `__. + + +Questions +========= + +For questions about the topic, you can consult the mailing `list archives `__ +or you can write a question on the dedicated Teams channel. diff --git a/Documentation/teaching/so2/assign3-software-raid.rst b/Documentation/teaching/so2/assign3-software-raid.rst new file mode 100644 index 00000000000000..e0b574a8b0d298 --- /dev/null +++ b/Documentation/teaching/so2/assign3-software-raid.rst @@ -0,0 +1,174 @@ +=========================== +Assignment 3 - Software RAID +=========================== + +- Deadline: :command:`Thursday, 16 May 2024, 23:59` + +Implementing a software RAID module that uses a logical block device that will read and write data from two physical devices, +ensuring the consistency and synchronization of data from the two physical devices. The type of RAID implemented will be similar to a `RAID 1`. + +Assignment's Objectives +======================= + +* in-depth understanding of how the I/O subsystem works. +* acquire advanced skills working with `bio` structures. +* work with the block / disk devices in the Linux kernel. +* acquire skills to navigate and understand the code and API dedicated to the I/O subsystem in Linux. + + +Statement +========= + +Write a kernel module that implements the RAID software functionality. `Software RAID `__ provides an abstraction between +the logical device and the physical devices. The implementation will use `RAID scheme 1 `__. + +The virtual machine has two hard disks that will represent the physical devices: `/dev/vdb` and `/dev/vdc`. The operating system +will provide a logical device (block type) that will interface the access from the user space. Writing requests to the logical device +will result in two writes, one for each hard disk. Hard disks are not partitioned. It will be considered that each hard disk has a +single partition that covers the entire disk. + +Each partition will store a sector along with an associated checksum (CRC32) to ensure error recovery. At each reading, the related +information from both partitions is read. If a sector of the first partition has corrupt data (CRC value is wrong) then the sector +on the second partition will be read; at the same time the sector of the first partition will be corrected. Similar in the case of +a reading of a corrupt sector on the second partition. If a sector has incorrect CRC values on both partitions, an appropriate error +code will be returned. + +Important to know +----------------- + +To ensure error recovery, a CRC code is associated with each sector. CRC codes are stored by LOGICAL_DISK_SIZE byte of the partition +(macro defined in the assignment `header `__). The disk structure will have the following layout: + + +.. code-block:: console + + +-----------+-----------+-----------+ +---+---+---+ + | sector1 | sector2 | sector3 |.....|C1 |C2 |C3 | + +-----------+-----------+-----------+ +---+---+---+ + +where ``C1``, ``C2``, ``C3`` are the values CRC sectors ``sector1``, ``sector2``, ``sector3``. The CRC area is found immediately after the ``LOGICAL_DISK_SIZE`` bytes of the partition. + +As a seed for CRC use 0(zero). + +Implementation Details +====================== + +- the kernel module will be named ``ssr.ko`` +- the logical device will be accessed as a block device with the major ``SSR_MAJOR`` and minor ``SSR_FIRST_MINOR`` under the name ``/dev/ssr`` (via the macro ``LOGICAL_DISK_NAME``) +- the virtual device (``LOGICAL_DISK_NAME`` - ``/dev/ssr``) will have the capacity of ``LOGICAL_DISK_SECTORS`` (use ``set_capacity`` with the ``struct gendisk`` structure) +- the two disks are represented by the devices ``/dev/vdb``, respectively ``/dev/vdc``, defined by means of macros ``PHYSICAL_DISK1_NAME``, respectively ``PHYSICAL_DISK2_NAME`` +- to work with the ``struct block _device`` structure associated with a physical device, you can use the ``blkdev_get_by_path`` and ``blkdev_put`` functions +- for the handling of requests from the user space, we recommend not to use a ``request_queue``, but to do processing at :c:type:`struct bio` level + using the ``submit_bio`` field of :c:type:`struct block_device_operations` +- since data sectors are separated from CRC sectors you will have to build separate ``bio`` structures for data and CRC values +- to allocate a :c:type:`struct bio` for physical disks you can use :c:func:`bio_alloc`; to add data pages to bio use :c:func:`alloc_page` and :c:func:`bio_add_page` +- to free up the space allocated for a :c:type:`struct bio` you need to release the pages allocated to the bio (using the :c:func:`__free_page` macro ) and call + :c:func:`bio_put` +- when generating a :c:type:`struct bio` structure, consider that its size must be multiple of the disk sector size (``KERNEL_SECTOR_SIZE``) +- to send a request to a block device and wait for it to end, you can use the :c:func:`submit_bio_wait` function +- use :c:func:`bio_endio` to signal the completion of processing a ``bio`` structure +- for the CRC32 calculation you can use the :c:func:`crc32` macro provided by the kernel +- useful macro definitions can be found in the assignment support `header `__ +- a single request processing function for block devices can be active at one time in a call stack (more details `here `__). + You will need to submit requests for physical devices in a kernel thread; we recommend using ``workqueues``. +- For a quick run, use a single bio to batch send the read/write request for CRC values for adjacent sectors. For example, + if you need to send requests for CRCs in sectors 0, 1, ..., 7, use a single bio, not 8 bios. +- our recommendations are not mandatory (any solution that meets the requirements of the assignment is accepted) +Testing +======= +In order to simplify the assignment evaluation process, but also to reduce the mistakes of the submitted assignments, +the assignment evaluation will be done automatically with the help of a +`test script `__ called `_checker`. +The test script assumes that the kernel module is called `ssr.ko`. + +If, as a result of the testing process, the sectors on both disks contain invalid data, resulting in +read errors that make the module impossible to use, you will need to redo the two disks in the +virtual machine using the commands: + +.. code-block:: console + + $ dd if=/dev/zero of=/dev/vdb bs=1M + $ dd if=/dev/zero of=/dev/vdc bs=1M + +You can also get the same result using the following command to start the virtual machine: + +.. code-block:: console + + $ rm disk{1,2}.img; make console # or rm disk{1,2}.img; make boot + +QuickStart +========== + +It is mandatory to start the implementation of the assignment from the code skeleton found in the `src `__ directory. +There is only one header in the skeleton called `ssr.h `__. +You will provide the rest of the implementation. You can add as many `*.c`` sources and additional `*.h`` headers. +You should also provide a Kbuild file that will compile the kernel module called `ssr.ko`. +Follow the instructions in the `README.md file `__ of the `assignment's repo `__. + + +Tips +---- + +To increase your chances of getting the highest grade, read and follow the Linux kernel +coding style described in the `Coding Style document `__. + +Also, use the following static analysis tools to verify the code: + +- checkpatch.pl + +.. code-block:: console + + $ linux/scripts/checkpatch.pl --no-tree --terse -f /path/to/your/file.c + +- sparse + +.. code-block:: console + + $ sudo apt-get install sparse + $ cd linux + $ make C=2 /path/to/your/file.c + +- cppcheck + +.. code-block:: console + + $ sudo apt-get install cppcheck + $ cppcheck /path/to/your/file.c + +Penalties +--------- + +Information about assigments penalties can be found on the +`General Directions page `__. + +In exceptional cases (the assigment passes the tests by not complying with the requirements) +and if the assigment does not pass all the tests, the grade will may decrease more than mentioned above. + +Submitting the assigment +------------------------ + +The assignment will be graded automatically using the `vmchecker-next `__ infrastructure. +The submission will be made on moodle on the `course's page `__ to the related assignment. +You will find the submission details in the `README.md file `__ of the `repo `__. + + +Resources +========= + +- implementation of the `RAID `__ software in the Linux kernel + +We recommend that you use gitlab to store your homework. Follow the directions in +`README `__. + + +Questions +========= + +For questions about the topic, you can consult the mailing `list archives `__ +or you can write a question on the dedicated Teams channel. + +Before you ask a question, make sure that: + + - you have read the statement of the assigment well + - the question is not already presented on the `FAQ page `__ + - the answer cannot be found in the `mailing list archives `__ diff --git a/Documentation/teaching/so2/assign4-transport-protocol.rst b/Documentation/teaching/so2/assign4-transport-protocol.rst new file mode 100644 index 00000000000000..192dc842c35cb3 --- /dev/null +++ b/Documentation/teaching/so2/assign4-transport-protocol.rst @@ -0,0 +1,253 @@ +===================================== +Assignment 4 - SO2 Transport Protocol +===================================== + +- Deadline: :command:`Monday, 29 May 2023, 23:00` +- This assignment can be made in teams (max 2). Only one of them must submit the assignment, and the names of the student should be listed in a README file. + +Implement a simple datagram transport protocol - STP (*SO2 Transport Protocol*). + +Assignment's Objectives +======================= + +* gaining knowledge about the operation of the networking subsystem in the Linux kernel +* obtaining skills to work with the basic structures of the networking subsystem in Linux +* deepening the notions related to communication and networking protocols by implementing a protocol in an existing protocol stack + +Statement +========= + +Implement, in the Linux kernel, a protocol called STP (*SO2 Transport Protocol*), at network and transport level, that works using datagrams (it is not connection-oriented and does not use flow-control elements). + +The STP protocol acts as a Transport layer protocol (port-based multiplexing) but operates at level 3 (Network) of `the OSI stack `__, above the Data Link level. + +The STP header is defined by the ``struct stp_header`` structure: + +.. code-block:: c + + struct stp_header { + __be16 dst; + __be16 src; + __be16 len; + __u8 flags; + __u8 csum; + }; + + +where: + + * ``len`` is the length of the packet in bytes (including the header); + * ``dst`` and ``src`` are the destination and source ports, respectively; + * ``flags`` contains various flags, currently unused (marked *reserved*); + * ``csum`` is the checksum of the entire package including the header; the checksum is calculated by exclusive OR (XOR) between all bytes. + +Sockets using this protocol will use the ``AF_STP`` family. + +The protocol must work directly over Ethernet. The ports used are between ``1`` and ``65535``. Port ``0`` is not used. + +The definition of STP-related structures and macros can be found in the `assignment support header `__. + +Implementation Details +====================== + +The kernel module will be named **af_stp.ko**. + +You have to define a structure of type `net_proto_family `__, which provides the operation to create STP sockets. +Newly created sockets are not associated with any port or interface and cannot receive / send packets. +You must initialize the `socket ops field `__ with the list of operations specific to the STP family. +This field refers to a structure `proto_ops `__ which must include the following functions: + +* ``release``: releases an STP socket +* ``bind``: associates a socket with a port (possibly also an interface) on which packets will be received / sent: + + * there may be bind sockets only on one port (not on an interface) + * sockets associated with only one port will be able to receive packets sent to that port on all interfaces (analogous to UDP sockets associated with only one port); these sockets cannot send packets because the interface from which they can be sent via the standard sockets API cannot be specified + * two sockets cannot be binded to the same port-interface combination: + + * if there is a socket already binded with a port and an interface then a second socket cannot be binded to the same port and the same interface or without a specified interface + * if there is a socket already binded to a port but without a specified interface then a second socket cannot be binded to the same port (with or without a specified interface) + + * we recommend using a hash table for bind instead of other data structures (list, array); in the kernel there is a hash table implementation in the `hashtable.h header `__ + +* ``connect``: associates a socket with a remote port and hardware address (MAC address) to which packets will be sent / received: + + * this should allow ``send`` / ``recv`` operations on the socket instead of ``sendmsg`` / ``recvmsg`` or ``sendto`` / ``recvfrom`` + * once connected to a host, sockets will only accept packets from that host + * once connected, the sockets can no longer be disconnected + +* ``sendmsg``, ``recvmsg``: send or receive a datagram on an STP socket: + + * for the *receive* part, metainformation about the host that sent the packet can be stored in the `cb field in sk_buff `__ + +* ``poll``: the default function ``datagram_poll`` will have to be used +* for the rest of the operations the predefined stubs in the kernel will have to be used (``sock_no_*``) + +.. code-block:: c + + static const struct proto_ops stp_ops = { + .family = PF_STP, + .owner = THIS_MODULE, + .release = stp_release, + .bind = stp_bind, + .connect = stp_connect, + .socketpair = sock_no_socketpair, + .accept = sock_no_accept, + .getname = sock_no_getname, + .poll = datagram_poll, + .ioctl = sock_no_ioctl, + .listen = sock_no_listen, + .shutdown = sock_no_shutdown, + .setsockopt = sock_no_setsockopt, + .getsockopt = sock_no_getsockopt, + .sendmsg = stp_sendmsg, + .recvmsg = stp_recvmsg, + .mmap = sock_no_mmap, + .sendpage = sock_no_sendpage, + }; + +Socket operations use a type of address called ``sockaddr_stp``, a type defined in the `assignment support header `__. +For the *bind* operation, only the port and the index of the interface on which the socket is bind will be considered. +For the *receive* operation, only the ``addr`` and ``port`` fields in the structure will be filled in with the MAC address of the host that sent the packet and with the port from which it was sent. +Also, when sending a packet, the destination host will be obtained from the ``addr`` and ``port`` fields of this structure. + +You need to register a structure `packet_type `__, using the call `dev_add_pack `__ to be able to receive STP packets from the network layer. + +The protocol will need to provide an interface through the *procfs* file system for statistics on sent / received packets. +The file must be named ``/proc/net/stp_stats``, specified by the ``STP_PROC_FULL_FILENAME`` macro in `assignment support header `__. +The format must be of simple table type with ``2`` rows: on the first row the header of the table, and on the second row the statistics corresponding to the columns. +The columns of the table must be in order: + +.. code:: + + RxPkts HdrErr CsumErr NoSock NoBuffs TxPkts + +where: + +* ``RxPkts`` - the number of packets received +* ``HdrErr`` - the number of packets received with header errors (packets too short or with source or destination 0 ports) +* ``CsumErr`` - the number of packets received with checksum errors +* ``NoSock`` - the number of received packets for which no destination socket was found +* ``NoBuffs`` - the number of received packets that could not be received because the socket queue was full +* ``TxPkts`` - the number of packets sent + +To create or delete the entry specified by ``STP_PROC_FULL_FILENAME`` we recommend using the functions `proc_create `__ and `proc_remove `__. + +Sample Protocol Implementations +------------------------------- + +For examples of protocol implementation, we recommend the implementation of `PF_PACKET `__ sockets and the various functions in `UDP implementation `__ or `IP implementation `__. + +Testing +======= + +In order to simplify the assignment evaluation process, but also to reduce the mistakes of the submitted assignments, +the assignment evaluation will be done automatically with the help of a +`test script `__ called `_checker`. +The test script assumes that the kernel module is called `af_stp.ko`. + +tcpdump +------- + +You can use the ``tcpdump`` utility to troubleshoot sent packets. +The tests use the loopback interface; to track sent packets you can use a command line of the form: + +.. code:: console + + tcpdump -i lo -XX + +You can use a static version of `tcpdump `__. +To add to the ``PATH`` environment variable in the virtual machine, copy this file to ``/linux/tools/labs/rootfs/bin``. +Create the directory if it does not exist. Remember to give the ``tcpdump`` file execution permissions: + +.. code:: console + + # Connect to the docker using ./local.sh docker interactive + cd /linux/tools/labs/rootfs/bin + wget http://elf.cs.pub.ro/so2/res/teme/tcpdump + chmod +x tcpdump + +QuickStart +========== + +It is mandatory to start the implementation of the assignment from the code skeleton found in the `src `__ directory. +There is only one header in the skeleton called `stp.h `__. +You will provide the rest of the implementation. You can add as many `*.c`` sources and additional `*.h`` headers. +You should also provide a Kbuild file that will compile the kernel module called `af_stp.ko`. +Follow the instructions in the `README.md file `__ of the `assignment's repo `__. + + + +Tips +---- + +To increase your chances of getting the highest grade, read and follow the Linux kernel coding style described in the `Coding Style document `__. + +Also, use the following static analysis tools to verify the code: + +* checkpatch.pl + + .. code-block:: console + + $ linux/scripts/checkpatch.pl --no-tree --terse -f /path/to/your/file.c + +* sparse + + .. code-block:: console + + $ sudo apt-get install sparse + $ cd linux + $ make C=2 /path/to/your/file.c + +* cppcheck + + .. code-block:: console + + $ sudo apt-get install cppcheck + $ cppcheck /path/to/your/file.c + +Penalties +--------- + +Information about assigments penalties can be found on the `General Directions page `__. + +In exceptional cases (the assigment passes the tests by not complying with the requirements) and if the assigment does not pass all the tests, the grade will may decrease more than mentioned above. + +Submitting the assigment +------------------------ + +The assignment will be graded automatically using the `vmchecker-next `__ infrastructure. +The submission will be made on moodle on the `course's page `__ to the related assignment. +You will find the submission details in the `README.md file `__ of the `repo `__. + + +Resources +========= + +* `Lecture 10 - Networking `__ +* `Lab 10 - Networking `__ +* Linux kernel sources + + * `Implementing PF_PACKET sockets `__ + * `Implementation of the UDP protocol `__ + * `Implementation of the IP protocol `__ + +* Understanding Linux Network Internals + + * chapters 8-13 + +* `assignment support header `__ + +We recommend that you use gitlab to store your homework. Follow the directions in `README `__. + +Questions +========= + +For questions about the topic, you can consult the mailing `list archives `__ +or you can write a question on the dedicated Teams channel. + +Before you ask a question, make sure that: + + - you have read the statement of the assigment well + - the question is not already presented on the `FAQ page `__ + - the answer cannot be found in the `mailing list archives `__ + diff --git a/Documentation/teaching/so2/assign5-pitix.rst b/Documentation/teaching/so2/assign5-pitix.rst new file mode 100644 index 00000000000000..ef61918bf6fa37 --- /dev/null +++ b/Documentation/teaching/so2/assign5-pitix.rst @@ -0,0 +1,231 @@ +=================================== +Assignment 5 - PITIX Filesystem +=================================== + +Deadline: :command:`Tuesday, 24 May 2022, 23:00` + +Statement +========= + +Write a kernel module to implement the **PITIX** file system, version 2. +This file system will only support files and directories. +Support operations for hard or symbolic links will not be implemented. +Also, support operations for special files (pipes, character devices, or blocks) will not be implemented. +Basically you need to implement the following: + * for directories: ``lookup``, ``unlink``, ``mkdir``, ``rmdir``, ``iterate`` + * for files: ``create``, ``truncate``, bitmap functions, see `minix_get_block `__. + +The rest of the functions either have generic kernel implementations, or you don't have to implement them. + +The disk structure of the file system is: + +.. code-block:: console + + +--------------+-----------+-----------+------------+-----------------------+ + | | | | | | + | superblock | imap | dmap | izone | dzone | + +--------------+-----------+-----------+------------+-----------------------+ + 4096 bytes 1 block 1 block 32 blocks 8*block_size blocks + + +where: + +* ``Superblock`` is the superblock (``4096`` bytes) +* ``Imap`` contains the bitmap of the blocks occupied by the inodes (``1`` block) +* ``Dmap`` contains the bitmap of the blocks occupied by the data (``1`` block) +* ``Izone`` contains inodes (``32`` blocks) +* ``Dzone`` contains the data (the actual contents of the files) (``8 * block_size`` blocks) + +The superblock (**on disk**) is described by the following structure: + +.. code-block:: c + + struct pitix_super_block { + unsigned long magic; + __u8 version; + __u8 block_size_bits; + __u8 imap_block; + __u8 dmap_block; + __u8 izone_block; + __u8 dzone_block; + __u16 bfree; + __u16 ffree; + }; + +where: + +* ``magic`` must be initialized with ``PITIX_MAGIC`` +* ``version`` must be initialized with ``2`` (``PITIX_VERSION``) +* ``block_size_bits`` is the block size of two; the block size can be ``512``, ``1024``, ``2048``, or ``4096`` +* ``Imap_block`` is the block number (relative to the device) to the bit vector used for the allocation / release sites inode +* ``dmap_block`` is the block number (relative to the device) for the bit vector used to allocate / release data blocks +* ``izone_block`` is the number of the first block (relative to the device) of the inode area +* ``dzone_block`` is the number of the first block (relative to the device) of the data area +* ``bfree`` is the number of free blocks (unallocated) +* ``ffree`` is the number of free (unallocated) inodes + +The inodes will be stored in the inode area and are described by the following structure: + +.. code-block:: c + + struct pitix_inode { + __u32 mode; + uid_t uid; + gid_t gid; + __u32 size; + __u32 time; + __u16 direct_data_blocks [INODE_DIRECT_DATA_BLOCKS]; + __u16 indirect_data_block; + }; + +where: + +* ``mode`` represents the access rights and inode type (file or directory) as represented in the kernel +* ``uid`` represents the UID as it is represented in the kernel +* ``gid`` represents the GID as it is represented in the kernel +* ``size`` is the size of the file / directory +* ``time`` represents the modification time as it is represented in the kernel +* ``direct_data_blocks`` is a vector (size ``INODE_DIRECT_DATA_BLOCKS`` ) that contains indexes of direct data blocks +* ``indirect_data_block`` is the index of a data block that contains the indexes of indirect data blocks + +The index of a data block (direct or indirect) indicates the number of that data block relative to the data area (``Dzone``). +The size of an index is ``2`` bytes. + +As can be seen from its structure, the inode uses a simple routing scheme for data blocks. +Blocks in the range ``[0, INODE_DIRECT_DATA_BLOCKS)`` are blocks of direct data and are referenced by elements of the vector ``direct_data_blocks`` and blocks in the range ``[INODE_DIRECT_DATA_BLOCKS, INODE_DIRECT_DATA_BL)`` are indirect data blocks and are referred to by indices within the data block indicated by ``indirect_data_block``. + +The data block indicated by ``indirect_data_block`` must be allocated when we have to refer to a first block of indirect data and must be released when there are no more blocks of indirect data. + +Unused indexes must be set to ``0``. +The first block, the one with index ``0``, is always allocated when formatting. This block cannot be used and, consequently, the value ``0``: + +* in an element of the vector, ``direct_data_blocks`` means free slot (that element does not refer to a block of data directly) +* ``indirect_data_block`` means that no data block is allocated to keep track of indirect data blocks (when no indirect data blocks are needed) +* an index within the data block referred to as ``indirect_data_block`` means free slot (that index does not refer to an indirect data block) + +It is guaranteed that the number of bytes occupied by an inode on the disk is a divisor of the block size. + +Directories have associated a single block of data (referred to as ``direct_data_block [0]``) in which directory entries will be stored. These are described by the following structure: + +.. code-block:: c + + struct pitix_dir_entry { + __u32 ino; + char name [PITIX_NAME_LEN]; + }; + +where + +* ``inoi`` is the inode number of the file or directory; this number is an index in the inode area +* ``name`` is the name of the file or directory; maximum name length is ``16`` bytes (``PITIX_NAME_LEN``); if the name length is less than 16 bytes, then the name will end with the ASCII character that has the code ``0`` (same as for strings) + +The root directory will be assigned inode ``0`` and data block ``0``. + +For simplicity, at ``mkdir`` it is not necessary to create the entries ``.`` (*dot*) and ``..`` (*dot dot*) in the new directory; the checker uses this assumption. + +All numeric values are stored on disk in byte-order CPU. + +In the `assignment header Block devices -> Loopback device support`` + +In order to simplify the assignment evaluation process, but also to reduce the mistakes of the submitted assignments, the assignment evaluation will be done automatically with with the help of public tests that are in the new infrastructure. + +For local testing, use the following commands: + +.. code-block:: console + + $ git clone https://github.com/linux-kernel-labs/linux.git + $ cd linux/tools/labs + $ LABS=assignments/5-pitix make skels + $ #the development of the assignment will be written in the 5-pitix directory + $ make build + $ make copy + $ make boot + +Instructions for using the test suite can be found in the ``README`` file. + +Tips +---- + +To increase your chances of getting the highest grade, read and follow the Linux kernel coding style described in the `Coding Style document `__. + +Also, use the following static analysis tools to verify the code: + +- checkpatch.pl + +.. code-block:: console + + $ linux/scripts/checkpatch.pl --no-tree --terse -f /path/to/your/file.c + +- sparse + +.. code-block:: console + + $ sudo apt-get install sparse + $ cd linux + $ make C=2 /path/to/your/file.c + +- cppcheck + +.. code-block:: console + + $ sudo apt-get install cppcheck + $ cppcheck /path/to/your/file.c + +Penalties +--------- + +As a more difficult assignment, it is worth 2 points. + +Information about assigments penalties can be found on the +`General Directions page `__. + +In exceptional cases (the assigment passes the tests by not complying with the requirements) +and if the assigment does not pass all the tests, the grade will may decrease more than mentioned above. + +Submitting the assigment +------------------------ + +The assignment archive will be submitted to vmchecker, according to the rules on the +`rules page `__. + +In the vmchecker interface choose the ``Google Challenge - Sistem de fișiere`` option for this assignment. + +Resources +========= + +* `assignment header `__ +* `Lab 08: File system drivers (Part 1) `__ +* `Lab 09: File system drivers (Part 2) `__ +* `Minix filesystem source code `__ + +We recommend that you use GitLab to store your homework. Follow the directions in +`README `__ +and on the dedicated `Git wiki page `__. + +The resources for the assignment can also be found in the `so2-assignments `__ repo on GitHub. +The repo contains a `Bash script `__ +that helps you create a private repository on the faculty `GitLab `__ instance. +Follow the tips from the `README `__ and +on the dedicated `Wiki page `__. + +Questions +========= + +For questions about the assigment, you can consult the mailing `list archives `__ +or send an e-mail (you must be `registered `__). +Please follow and follow `the tips for use of the list `__. + +Before you ask a question, make sure that: + +* you have read the statement of the assigment well +* the question is not already presented on the `FAQ page `__ +* the answer cannot be found in the `mailing list archives `__ diff --git a/Documentation/teaching/so2/assign7-kvm-vmm.rst b/Documentation/teaching/so2/assign7-kvm-vmm.rst new file mode 100644 index 00000000000000..3eb0b20c1ceb30 --- /dev/null +++ b/Documentation/teaching/so2/assign7-kvm-vmm.rst @@ -0,0 +1,295 @@ +===================================================== +Assignment 7 - SO2 Virtual Machine Manager with KVM +===================================================== + +- Deadline: :command:`Tuesday, 29 May 2023, 23:00` +- This assignment can be made in teams (max 2). Only one of them must submit the assignment, and the names of the student should be listed in a README file. + +In this assignment we will work on a simple Virtual Machine Manager (VMM). We will be using the KVM API +from the Linux kernel. + +The assignment has two components: the VM code and the VMM code. We will be using a very simple protocol +to enable the communication between the two components. The protocol is called SIMVIRTIO. + + +I. Virtual Machine Manager +========================== + +In general, to build a VMM from scratch we will have to implement three main functionalities: initialize the VMM, initialize the virtual CPU and run the guest code. We will split the implementation of the VMM in these three phases. + +1. Initialize the VMM +------------------------- + +A VM will be represented in general by three elements, a file descriptor used to interact with the KVM API, a file descriptor per VM used to configure it (e.g. set its memory) and a pointer to the VM's memory. We provide you with the following structure to start from when working with a VM. + +.. code-block:: c + + typedef struct vm { + int sys_fd; + int fd; + char *mem; + } virtual_machine; + + +The first step in initializing the KVM VM is to interract with the [KVM_API](https://www.kernel.org/doc/html/latest/virt/kvm/api.html]. The KVM API is exposed via ``/dev/kvm``. We will be using ioctl calls to call the API. + +The snippet below shows how one can call ``KVM_GET_API_VERSION`` to get the KVM API Version + +.. code-block:: c + + int kvm_fd = open("/dev/kvm", O_RDWR); + if (kvm_fd < 0) { + perror("open /dev/kvm"); + exit(1); + } + + int api_ver = ioctl(kvm_fd, KVM_GET_API_VERSION, 0); + if (api_ver < 0) { + perror("KVM_GET_API_VERSION"); + exit(1); + } + +Let us now go briefly through how a VMM initializes a VM. This is only the bare bones, a VMM may do lots of other things during VM initialization. + +1. We first use KVM_GET_API_VERSION to check that we are running the expected version of KVM, ``KVM_API_VERSION``. +2. We now create the VM using ``KVM_CREATE_VM``. Note that calling ``KVM_CREATE_VM`` returns a file descriptor. We will be using this file descriptor for the next phases of the setup. +3. (Optional) On Intel based CPUs we will have to call ``KVM_SET_TSS_ADDR`` with address ``0xfffbd000`` +4. Next, we allocate the memory for the VM, we will be using ``mmap`` for this with ``PROT_WRITE``, ``MAP_PRIVATE``, ``MAP_ANONYMOUS`` and ``MAP_NORESERVE``. We recommend allocating 0x100000 bytes for the VM. +5. We flag the memory as ``MADV_MERGEABLE`` using ``madvise`` +6. Finally, we use ``KVM_SET_USER_MEMORY_REGION`` to assign the memory to the VM. + +**Make sure you understand what file descriptor to use and when, we use the KVM fd when calling KVM_CREATE_VM, but when interacting with the vm such as calling KVM_SET_USER_MEMORY_REGION we use the VMs +file descriptor** + +TLDR: API used for VM initialization: + +* KVM_GET_API_VERSION +* KVM_CREATE_VM +* KVM_SET_TSS_ADDR +* KVM_SET_USER_MEMORY_REGION. + +2. Initialize a virtual CPU +___________________________ + +We need a Virtual CPU (VCPU) to store registers. + +.. code-block:: c + + typedef struct vcpu { + int fd; + struct kvm_run *kvm_run; + } virtual_cpu; + +To create a virtual CPU we will do the following: +1. Call ``KVM_CREATE_VCPU`` to create the virtual CPU. This call returns a file descriptor. +2. Use ``KVM_GET_VCPU_MMAP_SIZE`` to get the size of the shared memory +3. Allocated the necessary VCPU mem size with ``mmap``. We will be passing the VCPU file descriptor to the ``mmap`` call. We can store the result in ``kvm_run``. + + +TLDR: API used for VM + +* KVM_CREATE_VCPU +* KVM_GET_VCPU_MMAP_SIZE + +**We recommend using 2MB pages to simplify the translation process** + +Running the VM +============== + + +Setup real mode +--------------- + +At first, the CPU will start in Protected mode. To do run any meaningful code, we will switch the CPU to [Real mode](https://wiki.osdev.org/Real_Mode). To do this we will +need to configure several CPU registers. + +1. First, we will use ``KVM_GET_SREGS`` to get the registers. We use ``struct kvm_regs`` for this task. +2. We will need to set ``cs.selector`` and ``cs.base`` to 0. We will use ``KVM_SET_SREGS`` to set the registers. +3. Next we will clear all ``FLAGS`` bits via the ``rflags`` register, this means setting ``rflags`` to 2 since bit 1 must always be to 1. We alo set the ``RIP`` register to 0. + +Setup long mode +--------------- + +Read mode is all right for very simple guests, such as the one found in the folder `guest_16_bits`. But, +most programs nowdays need 64 bits addresses, and such we will need to switch to long mode. The following article from OSDev presents all the necessary information about [Setting Up Long Mode](https://wiki.osdev.org/Setting_Up_Long_Mode). + +In ``vcpu.h``, you may found helpful macros such as CR0_PE, CR0_MP, CR0_ET, etc. + +Since we will running a more complex program, we will also create a small stack for our program +``regs.rsp = 1 << 20;``. Don't forget to set the RIP and RFLAGS registers. + +Running +------- + +After we setup our VCPU in real or long mode we can finally start running code on the VM. + +1. We copy to the vm memory the guest code, `memcpy(vm->mem, guest_code, guest_code_size)` The guest code will be available in two variables which will be discussed below. +2. In a infinite loop we run the following: + * We call ``KVM_RUN`` on the VCPU file descriptor to run the VPCU + * Through the shared memory of the VCPU we check the ``exit_reason`` parameter to see if the guest has made any requests: + * We will handle the following VMEXITs: `KVM_EXIT_MMIO`, `KVM_EXIT_IO` and ``KVM_EXIT_HLT``. ``KVM_EXIT_MMIO`` is triggered when the VM writes to a MMIO address. ``KVM_EXIT_IO`` is called when the VM calls ``inb`` or ``outb``. ``KVM_EXIT_HLT`` is called when the user does a ``hlt`` instruction. + +Guest code +---------- + +The VM that is running is also called guest. We will be using the guest to test our implementation. + +1. To test the implementation before implementing SIMVIRTIO. The guest will write at address 400 and the RAX register the value 42. +2. To test a more complicated implementation,we will extend the previous program to also write "Hello, world!\n" on port `0xE9` using the `outb` instruction. +3. To test the implementation of `SIMVIRTIO`, we will + +How do we get the guest code? The guest code is available at the following static pointers guest16, guest16_end-guest16. The linker script is populating them. + + +## SIMVIRTIO: +From the communication between the guest and the VMM we will implement a very simple protocol called ``SIMVIRTIO``. It's a simplified version of the real protocol used in the real world called virtio. + +Configuration space: + ++--------------+----------------+----------------+----------------+------------------+-------------+-------------+ +| u32 | u16 | u8 | u8 | u8 | u8 | u8 | ++==============+================+================+================+==================+=============+=============+ +| magic value | max queue len | device status | driver status | queue selector | Q0(TX) CTL | Q1(RX) CTL | +| R | R | R | R/W | R/W | R/W | R/w | ++--------------+----------------+----------------+----------------+------------------+-------------+-------------+ + + +Controller queues +----------------- + +We provide you with the following structures and methods for the ``SIMVIRTIO`` implementation. + +.. code-block:: c + + typedef uint8_t q_elem_t; + typedef struct queue_control { + // Ptr to current available head/producer index in 'buffer'. + unsigned head; + // Ptr to last index in 'buffer' used by consumer. + unsigned tail; + } queue_control_t; + typedef struct simqueue { + // MMIO queue control. + volatile queue_control_t *q_ctrl; + // Size of the queue buffer/data. + unsigned maxlen; + // Queue data buffer. + q_elem_t *buffer; + } simqueue_t; + int circ_bbuf_push(simqueue_t *q, q_elem_t data) + { + } + int circ_bbuf_pop(simqueue_t *q, q_elem_t *data) + { + } + + +Device structures +----------------- + +.. code-block:: c + + #define MAGIC_VALUE 0x74726976 + #define DEVICE_RESET 0x0 + #define DEVICE_CONFIG 0x2 + #define DEVICE_READY 0x4 + #define DRIVER_ACK 0x0 + #define DRIVER 0x2 + #define DRIVER_OK 0x4 + #define DRIVER_RESET 0x8000 + typedef struct device { + uint32_t magic; + uint8_t device_status; + uint8_t driver_status; + uint8_t max_queue_len; + } device_t; + typedef struct device_table { + uint16_t count; + uint64_t device_addresses[10]; + } device_table_t; + + +We will be implementing the following handles: +* MMIO (read/write) VMEXIT +* PIO (read/write) VMEXIT + +Using the skeleton +================== + +Debugging +========= + + +Tasks +===== +1. 30p Implement a simple VMM that runs the code from `guest_16_bits`. We will be running the VCPU in read mode for this task +2. 20p Extend the previous implementation to run the VCPU in real mode. We will be running the `guest_32_bits` example +3. 30p Implement the `SIMVIRTIO` protocol. +4. 10p Implement pooling as opposed to VMEXIT. We will use the macro `USE_POOLING` to switch this option on and off. +5. 10p Add profiling code. Measure the number of VMEXITs triggered by the VMM. + +Submitting the assigment +------------------------ + +The assignment archive will be submitted on **Moodle**, according to the rules on the `rules page `__. + + +Tips +---- + +To increase your chances of getting the highest grade, read and follow the Linux kernel coding style described in the `Coding Style document `__. + +Also, use the following static analysis tools to verify the code: + +* checkpatch.pl + + .. code-block:: console + + $ linux/scripts/checkpatch.pl --no-tree --terse -f /path/to/your/file.c + +* sparse + + .. code-block:: console + + $ sudo apt-get install sparse + $ cd linux + $ make C=2 /path/to/your/file.c + +* cppcheck + + .. code-block:: console + + $ sudo apt-get install cppcheck + $ cppcheck /path/to/your/file.c + +Penalties +--------- + +Information about assigments penalties can be found on the `General Directions page `__. + +In exceptional cases (the assigment passes the tests by not complying with the requirements) and if the assigment does not pass all the tests, the grade will may decrease more than mentioned above. + +## References +We recommend you the following readings before starting to work on the homework: +* [KVM host in a few lines of code](https://zserge.com/posts/kvm/) + + +TLDR +---- + +1. The VMM creates and initializes a virtual machine and a virtual CPU +2. We switch to real mode and check run the simple guest code from `guest_16_bits` +3. We switch to long mode and run the more complex guest from `guest_32_bits` +4. We implement the SIMVIRTIO protocol. We will describe how it behaves in the following subtasks. +5. The guest writes in the TX queue (queue 0) the ascii code for `R` which will result in a `VMEXIT` +6. the VMM will handle the VMEXIT caused by the previous write in the queue. When the guests receiver the +`R` letter it will initiate the reser procedure of the device and set the device status to `DEVICE_RESET` +7. After the reset handling, the guest must set the status of the device to `DRIVER_ACK`. After this, the guest will write to the TX queue the letter `C` +8. In the VMM we will initialize the config process when letter `C` is received.It will set the device status to `DEVICE_CONFIG` and add a new entry in the device_table +9. After the configuration process is finished, the guest will set the driver status to `DRIVER_OK` +10. Nex, the VMM will set the device status to `DEVICE_READY` +11. The guest will write in the TX queue "Ana are mere" and will execute a halt +12. The VMM will print to the STDOUT the message received and execute the halt request +13. Finally, the VMM will verify that at address 0x400 and in register RAX is stored the value 42 + + diff --git a/Documentation/teaching/so2/grading.rst b/Documentation/teaching/so2/grading.rst new file mode 100644 index 00000000000000..ab728e12cb4649 --- /dev/null +++ b/Documentation/teaching/so2/grading.rst @@ -0,0 +1,207 @@ +=============================== +SO2 - General Rules and Grading +=============================== + +General Rules +============= + +1. Laboratory +------------- +There is no formal rule for dividing students; everyone can participate in any laboratory as long as the following rules are respected. +Priority for participation is given to students from the respective group (34xC3 or optional). +The limit of students in a laboratory is 14 people. +Starting from the third week, the participation list in the laboratory is "frozen". +Students who have a retake can participate in any laboratory as long as there are available spots. +Like other students, the participation list is "frozen" starting from the third week. +The division is done on the laboratory hours division page. +You can make up for a maximum of 2 laboratories (you can attend another subgroup) (in those laboratories where there are available spots). +Laboratories cannot be made up retroactively. You cannot make up a laboratory from the previous week within the same laboratory week. +Laboratory activities take place only in the laboratory room. +We encourage you to go through the brief and laboratory exercises at home. +You can solve exercises at home, but you will have to start from scratch in the laboratory. + +2. Final deadline for submitting assignments +-------------------------------------------- +The final deadline for submitting SO2 assignments is **Wednesday, May 29, 2024, 23:59.**. +Beyond this date, assignments cannot be submitted anymore. +Please ensure timely submission of assignments with complete information to be graded. +We will not accept assignments submitted after this date or assignments not submitted on vmchecker-next. +For the testing part, assignments will receive the score indicated from testing on vmchecker-next; tests failed due to reasons unrelated to vmchecker-next will not be graded. +Assignments cannot be submitted for the special June 2023 exam session. +Assignments can be resubmitted after TODO for the September 2024 exam session. +The deadline for submitting assignments for the Fall 2024 session is TODO. + +3. Assignment Presentations +--------------------------- +The SO2 team reserves the right to request presentations for some homework assignments. +A presentation involves a discussion with at least two assistants about the completion of the assignment, the solution used, and any encountered issues. +The purpose of the assignment presentation sessions is to clarify any uncertainties regarding the completion of the assignment and to verify its correctness. +Individuals who will present an assignment will be contacted at least 24 hours in advance by the laboratory assistant. +Most likely, a 15-minute slot before/after the SO2 class or at the end of the SO2 laboratory session will be used. + +4. Rules on Assignments +------------------------ +The assignments for Operating Systems 2 are individual, except when explicitly stated that an assignment can be solved in a team. +This is because the primary objective of the assignments is for you to acquire or deepen your practical skills. +If the level of collaboration is too high or if you seek solutions online, this objective will not be achieved. +Each assignment is to be completed by a student without consulting the source code of their peers. + +We understand that teamwork is important, but we do not have the environment to carry out team projects in the Operating Systems 2 course. +If you encounter any problems in completing an assignment, use the discussion list or ask the laboratory assistants or course instructors. +Our role is to help you solve them. +Feel free to rely on the SO2 team. + +You can discuss among yourselves within the bounds of common sense; that is, you should not dictate a solution to someone, but you can offer a general idea. +If you are the one being asked and providing explanations, please consider redirecting to the discussion list and the SO2 team. +It is not allowed to request the solution to an assignment on a site like StackExchange, Rent a Coder, ChatGPT etc. +You can ask more generic questions, but do not request the solution to the assignment. + +You can freely use code from the laboratory, skeletons provided by us. +You can use external resources (GitHub, open-source code, or others) as long as they do not represent obvious solutions to the assignments, publicly available with or without intention. +See also the next paragraph. + +It is not allowed to publish assignment solutions (even after the end of the course). +If you find assignment solutions on GitHub or elsewhere, report them to the discussion list or privately to the laboratory assistant or course instructor. +We reiterate that if you need clarification that you would address to older colleagues or other forums, StackExchange, or other sources, use the discussion list and the SO2 team. +It is the safest and most honest way to solve problems. + +It is not allowed to transfer files between yourselves. +In general, we recommend not to screen-share with another colleague, whether for inspiration or to help them with their assignment. +Avoid testing an assignment on a colleague's system. +There may be exceptions; you can help someone troubleshoot, but please ensure that it does not transition from "let's solve this problem together" to "let me solve your assignment for you". +However, we recommend using the discussion list or the SO2 team to ask questions. + +5. Penalties for Plagiarized Assignments +----------------------------------- + +In general, we consider punitive measures as a last resort. +As long as the assignment is completed individually, without problematic source code contribution from external sources, then it is not a plagiarized assignment. + +The notion of a plagiarized assignment refers to, without limitation, situations such as: + + * Two assignments that are similar enough to draw this conclusion; + * Using source code from the internet that is an obvious solution to the assignment; + * Using pieces of code from another colleague; + * Accessing another colleague's code during the assignment; + * Modifying an existing assignment; + * Following another colleague's code; + * Direct assistance in completing the assignment (someone else wrote or dictated the code); + * Someone else wrote the assignment (voluntarily, for payment, or other benefits). + * If two assignments are considered plagiarized, both the source and destination will be penalized equally, without discussions about who plagiarized from whom and whose fault it is. + +.. warning:: + + Plagiarizing an assignment results in the elimination of points for the assignments completed up to that session. + Any assignment submitted until that session receives a score of 0 and cannot be resubmitted during the current academic year. + If there were instances of plagiarized assignments during the semester, it will be possible to obtain points in the summer, for the September session, from assignments **not yet** submitted. + We reiterate that our goal is not and will not be penalization for plagiarism. + We consider cheating to be dishonest behavior that will be punished if it occurs. + However, our goal is to prevent cheating; for this purpose, we offer support and resources from the team in all its forms (discussion list, face-to-face discussions with the SO2 team). + Please use them with confidence; we believe that an honest approach to completing assignments will also result in a gain of knowledge and skills for you. + +6. Retake/Grade Increase +------------------------- + +In the retake/grade increase session in September, only assignments can be submitted, only the final exam can be retaken, or both. +You can continue to submit assignments with the deadlines from the semester, meaning you can achieve a maximum grade of 7 for each assignment. +Assignments are submitted using the vmchecker-next interface. +If you did not have plagiarized assignments during the semester, you can (re)submit any assignments. +If there were instances of plagiarized assignments during the semester, you can submit only assignments not yet submitted during the semester. +The submission deadline is TODO + +If you do not wish to retake the final exam, you can choose not to participate in the exam. +Grades will be recorded in the official catalog, according to the SO2 catalog. + +In the special retake/grade increase session in June, only the final exam can be retaken, and no homework assignments can be submitted. + +The exam in the retake session will consist of 11 equally weighted topics (for a total of 3 points - one topic is a bonus). Passing the exam is conditional on obtaining 1 point out of the 3 points assigned to the course. In practice, this means correctly solving 3 out of the 11 topics in the exam. + +In the case of retaking the final exam, the higher grade will be retained (between the semester grade and the grade from the retake session). + +You can participate in only one exam during a session. + +7. Class Redo +------------------- + +If you prefer, you can keep the score from the previous academic year for the entire semester's activity (labs, assignments, course work), and only retake the final exam. +You cannot keep the score for individual components of the semester (only assignments or only course work). + +If you want to keep the score from the previous academic year for the entire semester's activity, you must announce this at the begining of the semester. +Otherwise, the score from the previous academic year's semester will be reset according to the default mode. + +By default, the score for the academic year will be reset on October 1. +If you do not graduate from the course during the current academic year, you will need to retake it completely during the next academic year. + +Grading +======= + +You must achieve at least 4.5 points out of 10 to pass. + +1. Lectures (3 points) +---------------------- +* Completion of the course is conditioned by obtaining 30% (3 out of 10) of the course score. +* The lecture score will be obtained from 11 lecture quizzes to be completed before each class (one quiz is a bonus). +* Each course assignment contains a set of 4 questions from the material covered in the previous class (one question is a bonus). + * There will be no final exam. + * Each question is scored with 0 or 1. + * A question is scored only if it is fully and correctly answered. + * A question answered incompletely or one answered completely but with incorrect specifications or errors will not be scored. + * Course assignments cannot be redone. + * Each assignment lasts 3 minutes. + * The score is obtained from the formula min(sum_of_assignment_scores / 10 * 4/3, 10). + * The assignments are closed book. +* For those who cannot attend the course assignments or wish to improve their course score, an assignment will be given at the end of the semester (during the last class) covering all the course material. + * The end-of-semester assignment (last class) consists of 11 questions for the 3 course points and lasts 60 minutes. + * The end-of-semester assignment is open-book. You are allowed to use class notes, books, slides, laptops, or tablets without internet access. + * Access with mobile phones is not permitted. Phones must be turned off/silent/deactivated during the exam. + * You may download course materials, labs, or other resources for offline use. + + +2. Laboratory (2 points) +------------------------ +* The laboratories are held in EG106, EG306, and PR706. +* Completion of the laboratory exercises leads to obtaining 10 or 11 points allocated for the laboratory. +* The final grade for the laboratory is calculated using the formula (sum(l1:l12) / 12). + + +3. Assignments (5 points + Extra) +--------------------------------- +* There are 4 Assignments: + * Assignment 0 - "Kernel API" - 0.5 points + * Assignment 1 - "Kprobe based tracer" - 1.5 points + * Assignment 2 - "Driver UART" 1.5 points + * Assignment 3 - "Software RAID" - 1.5 points +* Extra activities: + * SO2 transport protocol - 2 points + * SO2 Virtual Machine Manager with KVM - 2 points +* In case the total score for assignments + "Extra" activities exceeds 5 points, the following procedure will be followed: + * 5 points are considered as part of the total score. + * The difference between the total score and 5 points will be proportionally adjusted relative to the grade obtained in the lecture. + +.. code-block:: c + + S = A0 + A1 + A2 + A3 + Extra; + if (S <= 5) + assignment_grade = S; + else + assignment_grade = 5 + (S - 5) * course_grade / 3; // 0 <= course_grade <=3 + +* The verification and scoring of assignments: + * Assignments are tested against plagiarism. + * Assignments will be automatically verified using the `vmchecker-next `__ infrastructure integrated with moodle. + * The verification tests are public. + * Students who upload their assignments on Moodle must wait for the checker's feedback in the feedback section of the assignment upload page. + * The grade listed in the feedback section will be the final grade for the assigment. + * There may be exceptional situations where this rule is not considered (for example, if the assignment is implemented solely to pass the tests and does not meet the assignment requirements). + * The verification system deducts points (automatically) for certain situations (segmentation faults, unhandled exceptions, compilation errors, or warnings) regardless of the test results. + * Deductions are specified in the instructions list and in the assignment statement. + * Deductions are subtracted from the assignment grade (maximum of 10) not from the assignment score. + +* Late assignments + * Each assignment has a deadline of 2 weeks from the publication date. (exception! Assignment 0) + * After the deadline, 0.25 points per day (out of 10, the maximum grade for each assignment) will be deducted for 12 days (up to a maximum grade of 7). + * The deduction is from the grade (maximum 10), not from the score. An assignment incurs deductions of 0.25 points per day from the maximum grade (10), regardless of its score. + * For example, if for assignment 3 (scored with 1.5 points) the delay is 4 days, you will receive a deduction of 4 * 0.25 = 1 point from the grade, resulting in a maximum grade of 9, equivalent to a maximum score of 1.35 points. + * After 12 days, no further deductions will be made; a maximum grade of 7 can be obtained for an assignment submitted 13 days after the deadline expiration, or 50 days, or more, including during the retake session. + + diff --git a/Documentation/teaching/so2/index.rst b/Documentation/teaching/so2/index.rst new file mode 100644 index 00000000000000..801882777f7877 --- /dev/null +++ b/Documentation/teaching/so2/index.rst @@ -0,0 +1,57 @@ +=================== +Operating Systems 2 +=================== + +.. toctree:: + :caption: Good To Know + :maxdepth: 1 + + grading.rst + +.. toctree:: + :caption: Lectures + :maxdepth: 1 + + lec1-intro.rst + lec2-syscalls.rst + lec3-processes.rst + lec4-interrupts.rst + lec5-smp.rst + lec6-address-space.rst + lec7-memory-management.rst + lec8-filesystems.rst + lec9-debugging.rst + lec10-networking.rst + lec11-arch.rst + lec12-virtualization.rst + +.. toctree:: + :caption: Labs + :maxdepth: 1 + + lab1-intro.rst + lab2-kernel-api.rst + lab3-device-drivers.rst + lab4-interrupts.rst + lab5-deferred-work.rst + lab6-memory-mapping.rst + lab7-block-device-drivers.rst + lab8-filesystems-part1.rst + lab9-filesystems-part2.rst + lab10-networking.rst + lab11-arm-kernel-development.rst + lab12-kernel-profiling.rst + +.. toctree:: + :caption: Assignments + :maxdepth: 1 + + assign-collaboration.rst + assign0-kernel-api.rst + assign1-kprobe-based-tracer.rst + assign2-driver-uart.rst + assign3-software-raid.rst + assign4-transport-protocol.rst + .. uncoment next line for pitix to be available in Docs + .. assign5-pitix.rst + assign7-kvm-vmm.rst diff --git a/Documentation/teaching/so2/lab1-intro.rst b/Documentation/teaching/so2/lab1-intro.rst new file mode 100644 index 00000000000000..461148a975fc07 --- /dev/null +++ b/Documentation/teaching/so2/lab1-intro.rst @@ -0,0 +1,112 @@ +========================= +SO2 Lab 01 - Introduction +========================= + +Lab objectives +============== + +* presenting the rules and objectives of the Operating Systems 2 lab +* introducing the lab documentation +* introducing the Linux kernel and related resources +* creating simple modules +* describing the process of kernel module compilation +* presenting how a module can be used with a kernel +* simple kernel debugging methods + +.. include:: ../labs/introduction.rst + :start-after: [SECTION-ABOUT-BEGIN] + :end-before: [SECTION-ABOUT-END] + +.. include:: ../labs/introduction.rst + :start-after: [SECTION-REFERENCES-BEGIN] + :end-before: [SECTION-REFERENCES-END] + +.. include:: ../labs/introduction.rst + :start-after: [SECTION-DOCUMENTATION-BEGIN] + :end-before: [SECTION-DOCUMENTATION-END] + +.. include:: ../labs/kernel_modules.rst + :start-after: [SECTION-OVERVIEW-BEGIN] + :end-before: [SECTION-OVERVIEW-END] + +.. include:: ../labs/kernel_modules.rst + :start-after: [SECTION-MODULE-EXAMPLE-BEGIN] + :end-before: [SECTION-MODULE-EXAMPLE-END] + +.. include:: ../labs/kernel_modules.rst + :start-after: [SECTION-COMPILE-MODULES-BEGIN] + :end-before: [SECTION-COMPILE-MODULES-END] + +.. include:: ../labs/kernel_modules.rst + :start-after: [SECTION-LOAD-MODULES-BEGIN] + :end-before: [SECTION-LOAD-MODULES-END] + +.. include:: ../labs/kernel_modules.rst + :start-after: [SECTION-DEBUG-MODULES-BEGIN] + :end-before: [SECTION-DEBUG-MODULES-END] + +.. note:: + + If you want to learn how to easily browse through the Linux source code + and how to debug kernel code, read the `Good to know <#good-to-know>`__ + section. + +Exercises +========= + +.. include:: ../labs/introduction.rst + :start-after: [SECTION-EXERCISES-REMARKS-BEGIN] + :end-before: [SECTION-EXERCISES-REMARKS-END] + +.. _exercises_summary: + +.. include:: ../labs/exercises-summary.hrst +.. |LAB_NAME| replace:: kernel_modules + +.. .. include:: ../labs/introduction.rst +.. :start-after: [EXERCISE1-BEGIN] +.. :end-before: [EXERCISE1-END] + +.. include:: ../labs/kernel_modules.rst + :start-after: [EXERCISE1-BEGIN] + :end-before: [EXERCISE1-END] + +.. include:: ../labs/kernel_modules.rst + :start-after: [EXERCISE2-BEGIN] + :end-before: [EXERCISE2-END] + +.. include:: ../labs/kernel_modules.rst + :start-after: [EXERCISE3-BEGIN] + :end-before: [EXERCISE3-END] + +.. include:: ../labs/kernel_modules.rst + :start-after: [EXERCISE4-BEGIN] + :end-before: [EXERCISE4-END] + +.. include:: ../labs/kernel_modules.rst + :start-after: [EXERCISE5-BEGIN] + :end-before: [EXERCISE5-END] + +.. include:: ../labs/kernel_modules.rst + :start-after: [EXERCISE6-BEGIN] + :end-before: [EXERCISE6-END] + +.. include:: ../labs/kernel_modules.rst + :start-after: [EXERCISE7-BEGIN] + :end-before: [EXERCISE7-END] + +.. _good-to-know: + +Good to know +============ + +The following sections contain useful information for getitng used to the Linux +kernel code and debugging techniques. + +.. include:: ../labs/introduction.rst + :start-after: [SECTION-CODE-NAVIGATION-BEGIN] + :end-before: [SECTION-CODE-NAVIGATION-END] + +.. include:: ../labs/introduction.rst + :start-after: [SECTION-DEBUGGING-BEGIN] + :end-before: [SECTION-DEBUGGING-END] diff --git a/Documentation/teaching/so2/lab10-networking.rst b/Documentation/teaching/so2/lab10-networking.rst new file mode 100644 index 00000000000000..0f9675cbee4127 --- /dev/null +++ b/Documentation/teaching/so2/lab10-networking.rst @@ -0,0 +1,6 @@ +======================= +SO2 Lab 10 - Networking +======================= + +.. include:: ../labs/networking.rst + :start-line: 4 diff --git a/Documentation/teaching/so2/lab11-arm-kernel-development.rst b/Documentation/teaching/so2/lab11-arm-kernel-development.rst new file mode 100644 index 00000000000000..6f59f8cc0196fc --- /dev/null +++ b/Documentation/teaching/so2/lab11-arm-kernel-development.rst @@ -0,0 +1,6 @@ +====================================== +SO2 Lab 11 - Kernel Development on ARM +====================================== + +.. include:: ../labs/arm_kernel_development.rst + :start-line: 4 diff --git a/Documentation/teaching/so2/lab12-kernel-profiling.rst b/Documentation/teaching/so2/lab12-kernel-profiling.rst new file mode 100644 index 00000000000000..ce8f13580f64e9 --- /dev/null +++ b/Documentation/teaching/so2/lab12-kernel-profiling.rst @@ -0,0 +1,6 @@ +============================= +SO2 Lab 12 - Kernel Profiling +============================= + +.. include:: ../labs/kernel_profiling.rst + :start-line: 4 diff --git a/Documentation/teaching/so2/lab2-kernel-api.rst b/Documentation/teaching/so2/lab2-kernel-api.rst new file mode 100644 index 00000000000000..379601db8a1b03 --- /dev/null +++ b/Documentation/teaching/so2/lab2-kernel-api.rst @@ -0,0 +1,6 @@ +======================= +SO2 Lab 02 - Kernel API +======================= + +.. include:: ../labs/kernel_api.rst + :start-line: 4 diff --git a/Documentation/teaching/so2/lab3-device-drivers.rst b/Documentation/teaching/so2/lab3-device-drivers.rst new file mode 100644 index 00000000000000..6a3c77b0afde9b --- /dev/null +++ b/Documentation/teaching/so2/lab3-device-drivers.rst @@ -0,0 +1,6 @@ +===================================== +SO2 Lab 03 - Character device drivers +===================================== + +.. include:: ../labs/device_drivers.rst + :start-line: 4 diff --git a/Documentation/teaching/so2/lab4-interrupts.rst b/Documentation/teaching/so2/lab4-interrupts.rst new file mode 100644 index 00000000000000..5375f6cc3d65b4 --- /dev/null +++ b/Documentation/teaching/so2/lab4-interrupts.rst @@ -0,0 +1,6 @@ +====================================== +SO2 Lab 04 - I/O access and Interrupts +====================================== + +.. include:: ../labs/interrupts.rst + :start-line: 4 diff --git a/Documentation/teaching/so2/lab5-deferred-work.rst b/Documentation/teaching/so2/lab5-deferred-work.rst new file mode 100644 index 00000000000000..bf763d7f33592f --- /dev/null +++ b/Documentation/teaching/so2/lab5-deferred-work.rst @@ -0,0 +1,6 @@ +========================== +SO2 Lab 05 - Deferred work +========================== + +.. include:: ../labs/deferred_work.rst + :start-line: 4 diff --git a/Documentation/teaching/so2/lab6-memory-mapping.rst b/Documentation/teaching/so2/lab6-memory-mapping.rst new file mode 100644 index 00000000000000..53bc205e3aafd0 --- /dev/null +++ b/Documentation/teaching/so2/lab6-memory-mapping.rst @@ -0,0 +1,6 @@ +=========================== +SO2 Lab 06 - Memory Mapping +=========================== + +.. include:: ../labs/memory_mapping.rst + :start-line: 4 diff --git a/Documentation/teaching/so2/lab7-block-device-drivers.rst b/Documentation/teaching/so2/lab7-block-device-drivers.rst new file mode 100644 index 00000000000000..2bbad421b7045d --- /dev/null +++ b/Documentation/teaching/so2/lab7-block-device-drivers.rst @@ -0,0 +1,6 @@ +================================= +SO2 Lab 07 - Block Device Drivers +================================= + +.. include:: ../labs/block_device_drivers.rst + :start-line: 4 diff --git a/Documentation/teaching/so2/lab8-filesystems-part1.rst b/Documentation/teaching/so2/lab8-filesystems-part1.rst new file mode 100644 index 00000000000000..d4d8516f0b416d --- /dev/null +++ b/Documentation/teaching/so2/lab8-filesystems-part1.rst @@ -0,0 +1,19 @@ +========================================= +SO2 Lab 08 - File system drivers (Part 1) +========================================= + +.. include:: ../labs/filesystems_part1.rst + :start-line: 4 + :end-before: [SURVEY-LABEL] + +.. important:: + In order to have a better understanding of what we do well and we can do + better, what factors affect your implication in teaching, extracurricular + but also professional activities, we ask you to complete `this survey + `_. The survey is a short one, + having answers with check marks, with an estimated completion time of + 3-5 minutes. Obviously, we will send you the analysis of the survey and + use it to improve the teaching activities. + +.. include:: ../labs/filesystems_part1.rst + :start-after: [SURVEY-LABEL] diff --git a/Documentation/teaching/so2/lab9-filesystems-part2.rst b/Documentation/teaching/so2/lab9-filesystems-part2.rst new file mode 100644 index 00000000000000..5ef61803a5b44d --- /dev/null +++ b/Documentation/teaching/so2/lab9-filesystems-part2.rst @@ -0,0 +1,6 @@ +========================================= +SO2 Lab 09 - File system drivers (Part 2) +========================================= + +.. include:: ../labs/filesystems_part2.rst + :start-line: 4 diff --git a/Documentation/teaching/so2/lec1-intro.rst b/Documentation/teaching/so2/lec1-intro.rst new file mode 100644 index 00000000000000..2a011341bbfb2d --- /dev/null +++ b/Documentation/teaching/so2/lec1-intro.rst @@ -0,0 +1,260 @@ +============================================================== +SO2 Lecture 01 - Course overview and Linux kernel introduction +============================================================== + +`View slides `_ + +.. slideconf:: + :autoslides: False + :theme: single-level + +.. slide:: SO2 Lecture 01 - Course overview and Linux kernel introduction + :inline-contents: False + :level: 1 + + +Echipa +====== + +.. slide:: Echipa + :inline-contents: True + :level: 2 + + * Daniel Băluță (Daniel), Răzvan Deaconescu (Răzvan, RD), Claudiu + Ghioc (Claudiu), Valentin Ghiță (Vali), Sergiu Weisz (Sergiu), + Octavian Purdilă (Tavi) + + * Alexandru Militaru (Alex), Teodora Șerbănescu (Teo), Ștefan + Teodorescu (Ștefan, Fane), Mihai Popescu (Mihai, Mișu), + Constantin Răducanu, Daniel Dinca, Laurențiu Ștefan + + * Mult succes în noul semestru! + +Poziționare curs +================ + +.. slide:: Poziționare curs + :inline-contents: True + :level: 2 + + .. ditaa:: + + +---------------------------------------------------------+ + | application programming (EGC, SPG, PP, SPRC, IOC, etc.) | + +---------------------------------------------------------+ + + +----------------------------------+ + | system programming (PC, SO, CPL) | + +----------------------------------+ + user space + ----------------------------------------------------------=- + kernel space + +--------------------------+ + | kernel programming (SO2) | + +--------------------------+ + + ----------------------------------------------------------=- + + +----------------------------------+ + | hardware (PM, CN1, CN2, PL ) | + +----------------------------------+ + +Resurse +======= + +.. slide:: Resurse + :inline-contents: True + :level: 2 + + * Linux Kernel Labs: https://linux-kernel-labs.github.io/ + * mailing list: so2@cursuri.cs.pub.ro + * Facebook + * vmchecker + * catalog Google, calendar Google + * LXR: https://elixir.bootlin.com/linux/v5.10.14/source + * cs.curs.pub.ro - rol de portal + * karma awards + +Comunitate +========== + +.. slide:: Comunitate + :inline-contents: True + :level: 2 + + * tutorial contribuții: https://linux-kernel-labs.github.io/refs/heads/master/info/contributing.html + * corecții, ajustări, precizări, informații utile + * listă de discuții + * răspundeți la întrebările colegilor voștri + * propuneți subiecte de discuție care au legătură cu disciplina + * Facebook + * sugestii, propuneri, feedback + * Primiți puncte de karma + +Notare +======= + +.. slide:: Notare + :inline-contents: True + :level: 2 + + * 2 puncte activitate la laborator + * 3 puncte „examen”, notare pe parcurs + * 5 puncte teme de casă + * Activități "extra" + * Punctajul din teme de casă + activitați extra ce depăsește 5 + puncte e corelat direct proportional cu nota de la examen + * Tema 0 - 0,5 puncte + * Temele 1, 2, 3 - câte 1,5 puncte fiecare + * Condiţii de promovare: nota finală 4.5, nota minimă examen 3 + +Obiectivele cursului +==================== + +.. slide:: Obiectivele cursului + :inline-contents: True + :level: 2 + + * Prezentarea structurii interne a unui sistem de operare + * Target: sisteme de operare de uz general + * Structura și componentele unui kernel monolitic + * Procese, FS, Networking + * Memory management + * Exemplificare pe Linux + +Obiectivele laboratorului si a temelor +====================================== + +.. slide:: Obiectivele laboratorului si a temelor + :inline-contents: True + :level: 2 + + * Însușirea cunoștințelor necesare implementării de device drivere + + * Înțelegerea în profunzime a cunoștințelor prin rezolvarea de + exerciții + +Cursuri necesare +================ + +.. slide:: Cursuri necesare + :inline-contents: True + :level: 2 + + * Programare: C + * SD: tabele de dispersie, arbori echilibrați + * IOCLA: lucrul cu registre și instrucțiuni de bază (adunări, comparaţii, salturi) + * CN: TLB/CAM, memorie, procesor, I/O + * PC, RL: ethernet, IP, sockeți + * SO: procese, fișiere, thread-uri, memorie virtuală + +Despre curs +=========== + +.. slide:: Despre curs + :inline-contents: True + :level: 2 + + * 12 cursuri + * interactiv + * participaţi la discuţii + * întrebaţi atunci când nu aţi înţeles + * destul de “dens”, se recomandă călduros parcurgerea suportului bibliografic înainte şi după curs + * 1h:20 prezentare + 20min teste si discutii pe marginea testului + +Lista cursuri +============= + +.. slide:: Lista cursuri + :inline-contents: True + :level: 2 + + .. hlist:: + :columns: 2 + + * Introducere + * Apeluri de sistem + * Procese + * Întreruperi + * Sincronizare + * Adresarea memoriei + * Gestiunea memoriei + * Gestiunea fișierelor + * Kernel debugging + * Gestiunea rețelei + * Virtualizare + * Kernel profiling + + +Despre laborator +================ + +.. slide:: Despre laborator + :inline-contents: True + :level: 2 + + * Kernel Modules and Device Drivers + * 15 min prezentare / 80 de minute lucru + * se punctează activitatea + * learn by doing + +Despre teme +=========== + +.. slide:: Despre teme + :inline-contents: True + :level: 2 + + * necesare: aprofundare API (laborator) și concepte (curs) + * teste publice + * suport de testare (vmchecker) + * relativ puţin cod de scris dar relativ dificile + * dificultatea constă în acomodarea cu noul mediu + +Lista teme +========== + +.. slide:: Lista teme + :inline-contents: True + :level: 2 + + * Tema 0 - Kernel API + * Kprobe based tracer + * Driver pentru portul serial + * Software RAID + * SO2 Transport Protocol + + +Bibliografie curs +================= + +.. slide:: Bibliografie curs + :inline-contents: True + :level: 2 + + * Linux Kernel Development, 3rd edition, Robert Love, Addison + Wesley, 2010 + + * Understanding the Linux Kernel, 3rd edition, Daniel P. Bovet & + Marco Cesati, O'Reilly 2005 + + * Linux Networking Architecture, Klaus Wehrle, Frank Pahlke, + Hartmut Ritter, Daniel Muller, Marc Bechler, Prentice Hall 2004 + + * Understanding Linux Network Internals, Christian Benvenuti, O'Reilly 2005 + +Bibliografie laborator +====================== + +.. slide:: Bibliografie laborator + :inline-contents: True + :level: 2 + + * Linux Device Drivers, 3nd edition, Alessandro Rubini & Jonathan + Corbet, O'Reilly 2006 + + * Linux Kernel in a Nutshell, Greg Kroah-Hartman, O'Reilly 2005 + + +.. include:: ../lectures/intro.rst + :start-line: 6 diff --git a/Documentation/teaching/so2/lec10-networking.rst b/Documentation/teaching/so2/lec10-networking.rst new file mode 100644 index 00000000000000..aec4b4144c3523 --- /dev/null +++ b/Documentation/teaching/so2/lec10-networking.rst @@ -0,0 +1,16 @@ +=========================== +SO2 Lecture 10 - Networking +=========================== + +`View slides `_ + +.. slideconf:: + :autoslides: False + :theme: single-level + +.. slide:: SO2 Lecture 10 - Networking + :inline-contents: False + :level: 1 + +.. include:: ../lectures/networking.rst + :start-line: 6 diff --git a/Documentation/teaching/so2/lec11-arch.rst b/Documentation/teaching/so2/lec11-arch.rst new file mode 100644 index 00000000000000..fa160b27e8f664 --- /dev/null +++ b/Documentation/teaching/so2/lec11-arch.rst @@ -0,0 +1,17 @@ +=================================== +SO2 Lecture 11 - Architecture Layer +=================================== + +`View slides `_ + +.. slideconf:: + :autoslides: False + :theme: single-level + +.. slide:: SO2 Lecture 11 - Architecture Layer + :inline-contents: False + :level: 1 + +.. include:: ../lectures/arch.rst + :start-line: 6 + diff --git a/Documentation/teaching/so2/lec12-profiling.rst b/Documentation/teaching/so2/lec12-profiling.rst new file mode 100644 index 00000000000000..8a85441410b801 --- /dev/null +++ b/Documentation/teaching/so2/lec12-profiling.rst @@ -0,0 +1,13 @@ +========================== +SO2 Lecture 12 - Profiling +========================== + +`View slides `_ + +.. slideconf:: + :autoslides: False + :theme: single-level + +.. slide:: SO2 Lecture 12 - Profiling + :inline-contents: False + :level: 1 diff --git a/Documentation/teaching/so2/lec12-virtualization.rst b/Documentation/teaching/so2/lec12-virtualization.rst new file mode 100644 index 00000000000000..9ababa1d3dfce5 --- /dev/null +++ b/Documentation/teaching/so2/lec12-virtualization.rst @@ -0,0 +1,16 @@ +=============================== +SO2 Lecture 12 - Virtualization +=============================== + +`View slides `_ + +.. slideconf:: + :autoslides: False + :theme: single-level + +.. slide:: SO2 Lecture 12 - Virtualization + :inline-contents: False + :level: 1 + +.. include:: ../lectures/virt.rst + :start-line: 6 diff --git a/Documentation/teaching/so2/lec2-syscalls.rst b/Documentation/teaching/so2/lec2-syscalls.rst new file mode 100644 index 00000000000000..bcf3bb6bc2e40d --- /dev/null +++ b/Documentation/teaching/so2/lec2-syscalls.rst @@ -0,0 +1,16 @@ +============================= +SO2 Lecture 02 - System calls +============================= + +`View slides `_ + +.. slideconf:: + :autoslides: False + :theme: single-level + +.. slide:: SO2 Lecture 02 - System calls + :inline-contents: False + :level: 1 + +.. include:: ../lectures/syscalls.rst + :start-line: 6 diff --git a/Documentation/teaching/so2/lec3-processes.rst b/Documentation/teaching/so2/lec3-processes.rst new file mode 100644 index 00000000000000..394e330664d3c7 --- /dev/null +++ b/Documentation/teaching/so2/lec3-processes.rst @@ -0,0 +1,16 @@ +========================== +SO2 Lecture 03 - Processes +========================== + +`View slides `_ + +.. slideconf:: + :autoslides: False + :theme: single-level + +.. slide:: SO2 Lecture 03 - Processes + :inline-contents: False + :level: 1 + +.. include:: ../lectures/processes.rst + :start-line: 6 diff --git a/Documentation/teaching/so2/lec4-interrupts.rst b/Documentation/teaching/so2/lec4-interrupts.rst new file mode 100644 index 00000000000000..3fba6c5bfef13f --- /dev/null +++ b/Documentation/teaching/so2/lec4-interrupts.rst @@ -0,0 +1,16 @@ +============================= +SO2 Lecture 04 - Interrupts +============================= + +`View slides `_ + +.. slideconf:: + :autoslides: False + :theme: single-level + +.. slide:: SO2 Lecture 04 - Interrupts + :inline-contents: False + :level: 1 + +.. include:: ../lectures/interrupts.rst + :start-line: 6 diff --git a/Documentation/teaching/so2/lec5-smp.rst b/Documentation/teaching/so2/lec5-smp.rst new file mode 100644 index 00000000000000..a0ef6425973b11 --- /dev/null +++ b/Documentation/teaching/so2/lec5-smp.rst @@ -0,0 +1,16 @@ +=========================================== +SO2 Lecture 05 - Symmetric Multi-Processing +=========================================== + +`View slides `_ + +.. slideconf:: + :autoslides: False + :theme: single-level + +.. slide:: SO2 Lecture 05 - Symmetric Multi-Processing + :inline-contents: False + :level: 1 + +.. include:: ../lectures/smp.rst + :start-line: 6 diff --git a/Documentation/teaching/so2/lec6-address-space.rst b/Documentation/teaching/so2/lec6-address-space.rst new file mode 100644 index 00000000000000..b0237be09b7fb8 --- /dev/null +++ b/Documentation/teaching/so2/lec6-address-space.rst @@ -0,0 +1,16 @@ +============================== +SO2 Lecture 06 - Address Space +============================== + +`View slides `_ + +.. slideconf:: + :autoslides: False + :theme: single-level + +.. slide:: SO2 Lecture 06 - Address Space + :inline-contents: False + :level: 1 + +.. include:: ../lectures/address-space.rst + :start-line: 6 diff --git a/Documentation/teaching/so2/lec7-memory-management.rst b/Documentation/teaching/so2/lec7-memory-management.rst new file mode 100644 index 00000000000000..05e2e3b0e7bda0 --- /dev/null +++ b/Documentation/teaching/so2/lec7-memory-management.rst @@ -0,0 +1,16 @@ +================================== +SO2 Lecture 07 - Memory Management +================================== + +`View slides `_ + +.. slideconf:: + :autoslides: False + :theme: single-level + +.. slide:: SO2 Lecture 07 - Memory Management + :inline-contents: False + :level: 1 + +.. include:: ../lectures/memory-management.rst + :start-line: 6 diff --git a/Documentation/teaching/so2/lec8-filesystems.rst b/Documentation/teaching/so2/lec8-filesystems.rst new file mode 100644 index 00000000000000..6029d651a4da64 --- /dev/null +++ b/Documentation/teaching/so2/lec8-filesystems.rst @@ -0,0 +1,17 @@ +====================================== +SO2 Lecture 08 - Filesystem Management +====================================== + +`View slides `_ + +.. slideconf:: + :autoslides: False + :theme: single-level + +.. slide:: SO2 Lecture 08 - Filesystem Management + :inline-contents: False + :level: 1 + +.. include:: ../lectures/fs.rst + :start-line: 6 + diff --git a/Documentation/teaching/so2/lec9-debugging.rst b/Documentation/teaching/so2/lec9-debugging.rst new file mode 100644 index 00000000000000..02b4ed6d4309ea --- /dev/null +++ b/Documentation/teaching/so2/lec9-debugging.rst @@ -0,0 +1,16 @@ +================================= +SO2 Lecture 09 - Kernel debugging +================================= + +`View slides `_ + +.. slideconf:: + :autoslides: False + :theme: single-level + +.. slide:: SO2 Lecture 09 - Kernel debugging + :inline-contents: False + :level: 1 + +.. include:: ../lectures/debugging.rst + :start-line: 6 diff --git a/Documentation/userspace-api/media/Makefile b/Documentation/userspace-api/media/Makefile index 81a4a1a53bcebc..fe020e0cc6f1e4 100644 --- a/Documentation/userspace-api/media/Makefile +++ b/Documentation/userspace-api/media/Makefile @@ -61,6 +61,7 @@ epub: all xml: all latex: $(IMGPDF) all linkcheck: +slides: all clean: -rm -f $(DOTTGT) $(IMGTGT) ${TARGETS} 2>/dev/null diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index e00a66d7237285..4ba0df574eb253 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -1264,6 +1264,9 @@ field userspace_addr, which must point at user addressable memory for the entire memory slot size. Any object may back this memory, including anonymous memory, ordinary files, and hugetlbfs. +On architectures that support a form of address tagging, userspace_addr must +be an untagged address. + It is recommended that the lower 21 bits of guest_phys_addr and userspace_addr be identical. This allows large pages in the guest to be backed by large pages in the host. @@ -1316,7 +1319,7 @@ documentation when it pops into existence). :Capability: KVM_CAP_ENABLE_CAP_VM :Architectures: all -:Type: vcpu ioctl +:Type: vm ioctl :Parameters: struct kvm_enable_cap (in) :Returns: 0 on success; -1 on error diff --git a/Documentation/x86/topology.rst b/Documentation/x86/topology.rst index e29739904e37ea..7f58010ea86afb 100644 --- a/Documentation/x86/topology.rst +++ b/Documentation/x86/topology.rst @@ -41,6 +41,8 @@ Package Packages contain a number of cores plus shared resources, e.g. DRAM controller, shared caches etc. +Modern systems may also use the term 'Die' for package. + AMD nomenclature for package is 'Node'. Package-related topology information in the kernel: @@ -53,11 +55,18 @@ Package-related topology information in the kernel: The number of dies in a package. This information is retrieved via CPUID. + - cpuinfo_x86.cpu_die_id: + + The physical ID of the die. This information is retrieved via CPUID. + - cpuinfo_x86.phys_proc_id: The physical ID of the package. This information is retrieved via CPUID and deduced from the APIC IDs of the cores in the package. + Modern systems use this value for the socket. There may be multiple + packages within a socket. This value may differ from cpu_die_id. + - cpuinfo_x86.logical_proc_id: The logical ID of the package. As we do not trust BIOSes to enumerate the diff --git a/Makefile b/Makefile index e30cf02da8b892..8de785b1c2c210 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 5 PATCHLEVEL = 10 -SUBLEVEL = 0 +SUBLEVEL = 14 EXTRAVERSION = NAME = Kleptomaniac Octopus @@ -263,7 +263,7 @@ clean-targets := %clean mrproper cleandocs no-dot-config-targets := $(clean-targets) \ cscope gtags TAGS tags help% %docs check% coccicheck \ $(version_h) headers headers_% archheaders archscripts \ - %asm-generic kernelversion %src-pkg dt_binding_check \ + %asm-generic kernelversion %src-pkg %slides dt_binding_check \ outputmakefile no-sync-config-targets := $(no-dot-config-targets) %install kernelrelease single-targets := %.a %.i %.ko %.lds %.ll %.lst %.mod %.o %.s %.symtypes %/ @@ -450,7 +450,7 @@ LEX = flex YACC = bison AWK = awk INSTALLKERNEL := installkernel -DEPMOD = /sbin/depmod +DEPMOD = depmod PERL = perl PYTHON = python PYTHON3 = python3 @@ -1663,7 +1663,7 @@ $(help-board-dirs): help-%: # Documentation targets # --------------------------------------------------------------------------- DOC_TARGETS := xmldocs latexdocs pdfdocs htmldocs epubdocs cleandocs \ - linkcheckdocs dochelp refcheckdocs + linkcheckdocs dochelp refcheckdocs slides PHONY += $(DOC_TARGETS) $(DOC_TARGETS): $(Q)$(MAKE) $(build)=Documentation $@ diff --git a/README.rst b/README.rst new file mode 120000 index 00000000000000..67226b289b4cc2 --- /dev/null +++ b/README.rst @@ -0,0 +1 @@ +Documentation/teaching/index.rst \ No newline at end of file diff --git a/arch/Kconfig b/arch/Kconfig index ba4e966484ab57..69fe7133c765d7 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -143,6 +143,22 @@ config UPROBES managed by the kernel and kept transparent to the probed application. ) +config HAVE_64BIT_ALIGNED_ACCESS + def_bool 64BIT && !HAVE_EFFICIENT_UNALIGNED_ACCESS + help + Some architectures require 64 bit accesses to be 64 bit + aligned, which also requires structs containing 64 bit values + to be 64 bit aligned too. This includes some 32 bit + architectures which can do 64 bit accesses, as well as 64 bit + architectures without unaligned access. + + This symbol should be selected by an architecture if 64 bit + accesses are required to be 64 bit aligned in this way even + though it is not a 64 bit architecture. + + See Documentation/unaligned-memory-access.txt for more + information on the topic of unaligned memory accesses. + config HAVE_EFFICIENT_UNALIGNED_ACCESS bool help @@ -1037,6 +1053,12 @@ config ARCH_WANT_LD_ORPHAN_WARN by the linker, since the locations of such sections can change between linker versions. +config ARCH_SPLIT_ARG64 + bool + help + If a 32-bit architecture requires 64-bit arguments to be split into + pairs of 32-bit arguments, select this option. + source "kernel/gcov/Kconfig" source "scripts/gcc-plugins/Kconfig" diff --git a/arch/alpha/include/asm/local64.h b/arch/alpha/include/asm/local64.h deleted file mode 100644 index 36c93b5cc239b6..00000000000000 --- a/arch/alpha/include/asm/local64.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/arc/Makefile b/arch/arc/Makefile index 0c6bf0d1df7ad1..578bdbbb0fa7fc 100644 --- a/arch/arc/Makefile +++ b/arch/arc/Makefile @@ -102,16 +102,22 @@ libs-y += arch/arc/lib/ $(LIBGCC) boot := arch/arc/boot -#default target for make without any arguments. -KBUILD_IMAGE := $(boot)/bootpImage - -all: bootpImage -bootpImage: vmlinux - -boot_targets += uImage uImage.bin uImage.gz +boot_targets := uImage.bin uImage.gz uImage.lzma +PHONY += $(boot_targets) $(boot_targets): vmlinux $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ +uimage-default-y := uImage.bin +uimage-default-$(CONFIG_KERNEL_GZIP) := uImage.gz +uimage-default-$(CONFIG_KERNEL_LZMA) := uImage.lzma + +PHONY += uImage +uImage: $(uimage-default-y) + @ln -sf $< $(boot)/uImage + @$(kecho) ' Image $(boot)/uImage is ready' + +CLEAN_FILES += $(boot)/uImage + archclean: $(Q)$(MAKE) $(clean)=$(boot) diff --git a/arch/arc/boot/Makefile b/arch/arc/boot/Makefile index 538b92f4dd2530..3b1f8a69a89ef2 100644 --- a/arch/arc/boot/Makefile +++ b/arch/arc/boot/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -targets := vmlinux.bin vmlinux.bin.gz uImage +targets := vmlinux.bin vmlinux.bin.gz # uImage build relies on mkimage being availble on your host for ARC target # You will need to build u-boot for ARC, rename mkimage to arc-elf32-mkimage @@ -13,11 +13,6 @@ LINUX_START_TEXT = $$(readelf -h vmlinux | \ UIMAGE_LOADADDR = $(CONFIG_LINUX_LINK_BASE) UIMAGE_ENTRYADDR = $(LINUX_START_TEXT) -suffix-y := bin -suffix-$(CONFIG_KERNEL_GZIP) := gz -suffix-$(CONFIG_KERNEL_LZMA) := lzma - -targets += uImage targets += uImage.bin targets += uImage.gz targets += uImage.lzma @@ -42,7 +37,3 @@ $(obj)/uImage.gz: $(obj)/vmlinux.bin.gz FORCE $(obj)/uImage.lzma: $(obj)/vmlinux.bin.lzma FORCE $(call if_changed,uimage,lzma) - -$(obj)/uImage: $(obj)/uImage.$(suffix-y) - @ln -sf $(notdir $<) $@ - @echo ' Image $@ is ready' diff --git a/arch/arc/include/asm/Kbuild b/arch/arc/include/asm/Kbuild index 81f4edec0c2a93..3c1afa524b9c26 100644 --- a/arch/arc/include/asm/Kbuild +++ b/arch/arc/include/asm/Kbuild @@ -1,7 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 generic-y += extable.h generic-y += kvm_para.h -generic-y += local64.h generic-y += mcs_spinlock.h generic-y += parport.h generic-y += user.h diff --git a/arch/arc/include/asm/page.h b/arch/arc/include/asm/page.h index b0dfed0f12be01..d9c264dc25fcb3 100644 --- a/arch/arc/include/asm/page.h +++ b/arch/arc/include/asm/page.h @@ -10,6 +10,7 @@ #ifndef __ASSEMBLY__ #define clear_page(paddr) memset((paddr), 0, PAGE_SIZE) +#define copy_user_page(to, from, vaddr, pg) copy_page(to, from) #define copy_page(to, from) memcpy((to), (from), PAGE_SIZE) struct vm_area_struct; diff --git a/arch/arm/boot/compressed/atags_to_fdt.c b/arch/arm/boot/compressed/atags_to_fdt.c index 8452753efebe56..31927d2fe29728 100644 --- a/arch/arm/boot/compressed/atags_to_fdt.c +++ b/arch/arm/boot/compressed/atags_to_fdt.c @@ -15,7 +15,8 @@ static int node_offset(void *fdt, const char *node_path) { int offset = fdt_path_offset(fdt, node_path); if (offset == -FDT_ERR_NOTFOUND) - offset = fdt_add_subnode(fdt, 0, node_path); + /* Add the node to root if not found, dropping the leading '/' */ + offset = fdt_add_subnode(fdt, 0, node_path + 1); return offset; } diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index caa27322a0ab70..3a392983ac079f 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S @@ -116,7 +116,7 @@ /* * Debug print of the final appended DTB location */ - .macro dbgadtb, begin, end + .macro dbgadtb, begin, size #ifdef DEBUG kputc #'D' kputc #'T' @@ -129,7 +129,7 @@ kputc #'(' kputc #'0' kputc #'x' - kphex \end, 8 /* End of appended DTB */ + kphex \size, 8 /* Size of appended DTB */ kputc #')' kputc #'\n' #endif diff --git a/arch/arm/boot/dts/armada-xp-98dx3236.dtsi b/arch/arm/boot/dts/armada-xp-98dx3236.dtsi index 654648b05c7c2e..aeccedd1257404 100644 --- a/arch/arm/boot/dts/armada-xp-98dx3236.dtsi +++ b/arch/arm/boot/dts/armada-xp-98dx3236.dtsi @@ -266,11 +266,6 @@ reg = <0x11000 0x100>; }; -&i2c1 { - compatible = "marvell,mv78230-i2c", "marvell,mv64xxx-i2c"; - reg = <0x11100 0x100>; -}; - &mpic { reg = <0x20a00 0x2d0>, <0x21070 0x58>; }; diff --git a/arch/arm/boot/dts/aspeed-bmc-facebook-tiogapass.dts b/arch/arm/boot/dts/aspeed-bmc-facebook-tiogapass.dts index 2d44d9ad4e400f..e6ad821a863597 100644 --- a/arch/arm/boot/dts/aspeed-bmc-facebook-tiogapass.dts +++ b/arch/arm/boot/dts/aspeed-bmc-facebook-tiogapass.dts @@ -82,11 +82,6 @@ status = "okay"; }; -&vuart { - // VUART Host Console - status = "okay"; -}; - &uart1 { // Host Console status = "okay"; diff --git a/arch/arm/boot/dts/aspeed-bmc-intel-s2600wf.dts b/arch/arm/boot/dts/aspeed-bmc-intel-s2600wf.dts index 1deb30ec912cf6..6e9baf3bba5310 100644 --- a/arch/arm/boot/dts/aspeed-bmc-intel-s2600wf.dts +++ b/arch/arm/boot/dts/aspeed-bmc-intel-s2600wf.dts @@ -22,9 +22,9 @@ #size-cells = <1>; ranges; - vga_memory: framebuffer@7f000000 { + vga_memory: framebuffer@9f000000 { no-map; - reg = <0x7f000000 0x01000000>; + reg = <0x9f000000 0x01000000>; /* 16M */ }; }; diff --git a/arch/arm/boot/dts/aspeed-bmc-opp-tacoma.dts b/arch/arm/boot/dts/aspeed-bmc-opp-tacoma.dts index 4d070d6ba09f99..e86c22ce6d1232 100644 --- a/arch/arm/boot/dts/aspeed-bmc-opp-tacoma.dts +++ b/arch/arm/boot/dts/aspeed-bmc-opp-tacoma.dts @@ -26,7 +26,7 @@ #size-cells = <1>; ranges; - flash_memory: region@ba000000 { + flash_memory: region@b8000000 { no-map; reg = <0xb8000000 0x4000000>; /* 64M */ }; diff --git a/arch/arm/boot/dts/aspeed-g6.dtsi b/arch/arm/boot/dts/aspeed-g6.dtsi index b58220a49cbd85..bf97aaad7be9b7 100644 --- a/arch/arm/boot/dts/aspeed-g6.dtsi +++ b/arch/arm/boot/dts/aspeed-g6.dtsi @@ -357,7 +357,7 @@ #gpio-cells = <2>; gpio-controller; compatible = "aspeed,ast2600-gpio"; - reg = <0x1e780000 0x800>; + reg = <0x1e780000 0x400>; interrupts = ; gpio-ranges = <&pinctrl 0 0 208>; ngpios = <208>; diff --git a/arch/arm/boot/dts/at91-sam9x60ek.dts b/arch/arm/boot/dts/at91-sam9x60ek.dts index eae28b82c7fd02..73b6b1f89de992 100644 --- a/arch/arm/boot/dts/at91-sam9x60ek.dts +++ b/arch/arm/boot/dts/at91-sam9x60ek.dts @@ -569,11 +569,14 @@ atmel,pins = ; }; }; -}; /* pinctrl */ -&pmc { - atmel,osc-bypass; -}; + usb1 { + pinctrl_usb_default: usb_default { + atmel,pins = ; + }; + }; +}; /* pinctrl */ &pwm0 { pinctrl-names = "default"; @@ -684,6 +687,8 @@ atmel,vbus-gpio = <0 &pioD 15 GPIO_ACTIVE_HIGH &pioD 16 GPIO_ACTIVE_HIGH>; + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_usb_default>; status = "okay"; }; diff --git a/arch/arm/boot/dts/at91-sama5d3_xplained.dts b/arch/arm/boot/dts/at91-sama5d3_xplained.dts index cf13632edd4446..5179258f92470a 100644 --- a/arch/arm/boot/dts/at91-sama5d3_xplained.dts +++ b/arch/arm/boot/dts/at91-sama5d3_xplained.dts @@ -242,6 +242,11 @@ atmel,pins = ; /* PE9, conflicts with A9 */ }; + pinctrl_usb_default: usb_default { + atmel,pins = + ; + }; }; }; }; @@ -259,6 +264,8 @@ &pioE 3 GPIO_ACTIVE_LOW &pioE 4 GPIO_ACTIVE_LOW >; + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_usb_default>; status = "okay"; }; diff --git a/arch/arm/boot/dts/at91-sama5d4_xplained.dts b/arch/arm/boot/dts/at91-sama5d4_xplained.dts index e5974a17374cfe..0b3ad1b580b834 100644 --- a/arch/arm/boot/dts/at91-sama5d4_xplained.dts +++ b/arch/arm/boot/dts/at91-sama5d4_xplained.dts @@ -134,6 +134,11 @@ atmel,pins = ; }; + pinctrl_usb_default: usb_default { + atmel,pins = + ; + }; pinctrl_key_gpio: key_gpio_0 { atmel,pins = ; @@ -159,6 +164,8 @@ &pioE 11 GPIO_ACTIVE_HIGH &pioE 14 GPIO_ACTIVE_HIGH >; + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_usb_default>; status = "okay"; }; diff --git a/arch/arm/boot/dts/at91sam9rl.dtsi b/arch/arm/boot/dts/at91sam9rl.dtsi index 5653e70c84b4b9..36a42a9fe19576 100644 --- a/arch/arm/boot/dts/at91sam9rl.dtsi +++ b/arch/arm/boot/dts/at91sam9rl.dtsi @@ -282,23 +282,26 @@ atmel,adc-use-res = "highres"; trigger0 { - trigger-name = "timer-counter-0"; + trigger-name = "external-rising"; trigger-value = <0x1>; + trigger-external; }; + trigger1 { - trigger-name = "timer-counter-1"; - trigger-value = <0x3>; + trigger-name = "external-falling"; + trigger-value = <0x2>; + trigger-external; }; trigger2 { - trigger-name = "timer-counter-2"; - trigger-value = <0x5>; + trigger-name = "external-any"; + trigger-value = <0x3>; + trigger-external; }; trigger3 { - trigger-name = "external"; - trigger-value = <0x13>; - trigger-external; + trigger-name = "continuous"; + trigger-value = <0x6>; }; }; diff --git a/arch/arm/boot/dts/exynos5410-odroidxu.dts b/arch/arm/boot/dts/exynos5410-odroidxu.dts index 75b4150c26d725..bd1d8499a108b6 100644 --- a/arch/arm/boot/dts/exynos5410-odroidxu.dts +++ b/arch/arm/boot/dts/exynos5410-odroidxu.dts @@ -327,6 +327,8 @@ regulator-name = "vddq_lcd"; regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; + /* Supplies also GPK and GPJ */ + regulator-always-on; }; ldo8_reg: LDO8 { @@ -637,11 +639,11 @@ }; &usbdrd_dwc3_0 { - dr_mode = "host"; + dr_mode = "peripheral"; }; &usbdrd_dwc3_1 { - dr_mode = "peripheral"; + dr_mode = "host"; }; &usbdrd3_0 { diff --git a/arch/arm/boot/dts/exynos5410-pinctrl.dtsi b/arch/arm/boot/dts/exynos5410-pinctrl.dtsi index e5d0a2a4f64830..d0aa18443a69b4 100644 --- a/arch/arm/boot/dts/exynos5410-pinctrl.dtsi +++ b/arch/arm/boot/dts/exynos5410-pinctrl.dtsi @@ -560,6 +560,34 @@ interrupt-controller; #interrupt-cells = <2>; }; + + usb3_1_oc: usb3-1-oc { + samsung,pins = "gpk2-4", "gpk2-5"; + samsung,pin-function = ; + samsung,pin-pud = ; + samsung,pin-drv = ; + }; + + usb3_1_vbusctrl: usb3-1-vbusctrl { + samsung,pins = "gpk2-6", "gpk2-7"; + samsung,pin-function = ; + samsung,pin-pud = ; + samsung,pin-drv = ; + }; + + usb3_0_oc: usb3-0-oc { + samsung,pins = "gpk3-0", "gpk3-1"; + samsung,pin-function = ; + samsung,pin-pud = ; + samsung,pin-drv = ; + }; + + usb3_0_vbusctrl: usb3-0-vbusctrl { + samsung,pins = "gpk3-2", "gpk3-3"; + samsung,pin-function = ; + samsung,pin-pud = ; + samsung,pin-drv = ; + }; }; &pinctrl_2 { diff --git a/arch/arm/boot/dts/exynos5410.dtsi b/arch/arm/boot/dts/exynos5410.dtsi index 60a87684b1af6b..584ce62361b134 100644 --- a/arch/arm/boot/dts/exynos5410.dtsi +++ b/arch/arm/boot/dts/exynos5410.dtsi @@ -390,6 +390,8 @@ &usbdrd3_0 { clocks = <&clock CLK_USBD300>; clock-names = "usbdrd30"; + pinctrl-names = "default"; + pinctrl-0 = <&usb3_0_oc>, <&usb3_0_vbusctrl>; }; &usbdrd_phy0 { @@ -401,6 +403,8 @@ &usbdrd3_1 { clocks = <&clock CLK_USBD301>; clock-names = "usbdrd30"; + pinctrl-names = "default"; + pinctrl-0 = <&usb3_1_oc>, <&usb3_1_vbusctrl>; }; &usbdrd_dwc3_1 { diff --git a/arch/arm/boot/dts/imx6q-tbs2910.dts b/arch/arm/boot/dts/imx6q-tbs2910.dts index 861e05d53157e9..343364d3e4f7d3 100644 --- a/arch/arm/boot/dts/imx6q-tbs2910.dts +++ b/arch/arm/boot/dts/imx6q-tbs2910.dts @@ -16,6 +16,13 @@ stdout-path = &uart1; }; + aliases { + mmc0 = &usdhc2; + mmc1 = &usdhc3; + mmc2 = &usdhc4; + /delete-property/ mmc3; + }; + memory@10000000 { device_type = "memory"; reg = <0x10000000 0x80000000>; diff --git a/arch/arm/boot/dts/imx6qdl-gw52xx.dtsi b/arch/arm/boot/dts/imx6qdl-gw52xx.dtsi index 736074f1c3ef94..959d8ac2e393bf 100644 --- a/arch/arm/boot/dts/imx6qdl-gw52xx.dtsi +++ b/arch/arm/boot/dts/imx6qdl-gw52xx.dtsi @@ -418,7 +418,7 @@ /* VDD_AUD_1P8: Audio codec */ reg_aud_1p8v: ldo3 { - regulator-name = "vdd1p8"; + regulator-name = "vdd1p8a"; regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; regulator-boot-on; diff --git a/arch/arm/boot/dts/imx6qdl-kontron-samx6i.dtsi b/arch/arm/boot/dts/imx6qdl-kontron-samx6i.dtsi index 24f793ca288679..92f9977d14822b 100644 --- a/arch/arm/boot/dts/imx6qdl-kontron-samx6i.dtsi +++ b/arch/arm/boot/dts/imx6qdl-kontron-samx6i.dtsi @@ -137,7 +137,7 @@ lcd_backlight: lcd-backlight { compatible = "pwm-backlight"; - pwms = <&pwm4 0 5000000>; + pwms = <&pwm4 0 5000000 0>; pwm-names = "LCD_BKLT_PWM"; brightness-levels = <0 10 20 30 40 50 60 70 80 90 100>; @@ -167,7 +167,7 @@ i2c-gpio,delay-us = <2>; /* ~100 kHz */ #address-cells = <1>; #size-cells = <0>; - status = "disabld"; + status = "disabled"; }; i2c_cam: i2c-gpio-cam { @@ -179,7 +179,7 @@ i2c-gpio,delay-us = <2>; /* ~100 kHz */ #address-cells = <1>; #size-cells = <0>; - status = "disabld"; + status = "disabled"; }; }; diff --git a/arch/arm/boot/dts/imx6qdl-sr-som.dtsi b/arch/arm/boot/dts/imx6qdl-sr-som.dtsi index b06577808ff4eb..7e4e5fd0143a12 100644 --- a/arch/arm/boot/dts/imx6qdl-sr-som.dtsi +++ b/arch/arm/boot/dts/imx6qdl-sr-som.dtsi @@ -53,7 +53,6 @@ &fec { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_microsom_enet_ar8035>; - phy-handle = <&phy>; phy-mode = "rgmii-id"; phy-reset-duration = <2>; phy-reset-gpios = <&gpio4 15 GPIO_ACTIVE_LOW>; @@ -63,10 +62,19 @@ #address-cells = <1>; #size-cells = <0>; - phy: ethernet-phy@0 { + /* + * The PHY can appear at either address 0 or 4 due to the + * configuration (LED) pin not being pulled sufficiently. + */ + ethernet-phy@0 { reg = <0>; qca,clk-out-frequency = <125000000>; }; + + ethernet-phy@4 { + reg = <4>; + qca,clk-out-frequency = <125000000>; + }; }; }; diff --git a/arch/arm/boot/dts/meson8b-odroidc1.dts b/arch/arm/boot/dts/meson8b-odroidc1.dts index 0c26467de4d037..5963566dbcc9d5 100644 --- a/arch/arm/boot/dts/meson8b-odroidc1.dts +++ b/arch/arm/boot/dts/meson8b-odroidc1.dts @@ -224,7 +224,7 @@ reg = <0>; reset-assert-us = <10000>; - reset-deassert-us = <30000>; + reset-deassert-us = <80000>; reset-gpios = <&gpio GPIOH_4 GPIO_ACTIVE_LOW>; interrupt-parent = <&gpio_intc>; diff --git a/arch/arm/boot/dts/meson8m2-mxiii-plus.dts b/arch/arm/boot/dts/meson8m2-mxiii-plus.dts index cc498191ddd1df..8f4eb1ed458163 100644 --- a/arch/arm/boot/dts/meson8m2-mxiii-plus.dts +++ b/arch/arm/boot/dts/meson8m2-mxiii-plus.dts @@ -81,7 +81,7 @@ reg = <0>; reset-assert-us = <10000>; - reset-deassert-us = <30000>; + reset-deassert-us = <80000>; reset-gpios = <&gpio GPIOH_4 GPIO_ACTIVE_LOW>; }; }; diff --git a/arch/arm/boot/dts/omap3-n950-n9.dtsi b/arch/arm/boot/dts/omap3-n950-n9.dtsi index 11d41e86f814d2..7dde9fbb06d33c 100644 --- a/arch/arm/boot/dts/omap3-n950-n9.dtsi +++ b/arch/arm/boot/dts/omap3-n950-n9.dtsi @@ -494,3 +494,11 @@ clock-names = "sysclk"; }; }; + +&aes1_target { + status = "disabled"; +}; + +&aes2_target { + status = "disabled"; +}; diff --git a/arch/arm/boot/dts/omap4-panda-es.dts b/arch/arm/boot/dts/omap4-panda-es.dts index cfa85aa3da085e..6afa8fd7c412de 100644 --- a/arch/arm/boot/dts/omap4-panda-es.dts +++ b/arch/arm/boot/dts/omap4-panda-es.dts @@ -46,7 +46,7 @@ button_pins: pinmux_button_pins { pinctrl-single,pins = < - OMAP4_IOPAD(0x11b, PIN_INPUT_PULLUP | MUX_MODE3) /* gpio_113 */ + OMAP4_IOPAD(0x0fc, PIN_INPUT_PULLUP | MUX_MODE3) /* gpio_113 */ >; }; }; diff --git a/arch/arm/boot/dts/picoxcell-pc3x2.dtsi b/arch/arm/boot/dts/picoxcell-pc3x2.dtsi index c4c6c7e9e37b67..5898879a3038e8 100644 --- a/arch/arm/boot/dts/picoxcell-pc3x2.dtsi +++ b/arch/arm/boot/dts/picoxcell-pc3x2.dtsi @@ -45,18 +45,21 @@ emac: gem@30000 { compatible = "cadence,gem"; reg = <0x30000 0x10000>; + interrupt-parent = <&vic0>; interrupts = <31>; }; dmac1: dmac@40000 { compatible = "snps,dw-dmac"; reg = <0x40000 0x10000>; + interrupt-parent = <&vic0>; interrupts = <25>; }; dmac2: dmac@50000 { compatible = "snps,dw-dmac"; reg = <0x50000 0x10000>; + interrupt-parent = <&vic0>; interrupts = <26>; }; @@ -233,6 +236,7 @@ axi2pico@c0000000 { compatible = "picochip,axi2pico-pc3x2"; reg = <0xc0000000 0x10000>; + interrupt-parent = <&vic0>; interrupts = <13 14 15 16 17 18 19 20 21>; }; }; diff --git a/arch/arm/boot/dts/sama5d2.dtsi b/arch/arm/boot/dts/sama5d2.dtsi index 2ddc85dff8ce9e..2c4952427296ef 100644 --- a/arch/arm/boot/dts/sama5d2.dtsi +++ b/arch/arm/boot/dts/sama5d2.dtsi @@ -656,6 +656,7 @@ clocks = <&pmc PMC_TYPE_PERIPHERAL 51>; #address-cells = <1>; #size-cells = <1>; + no-memory-wc; ranges = <0 0xf8044000 0x1420>; }; @@ -724,7 +725,7 @@ can0: can@f8054000 { compatible = "bosch,m_can"; - reg = <0xf8054000 0x4000>, <0x210000 0x4000>; + reg = <0xf8054000 0x4000>, <0x210000 0x1c00>; reg-names = "m_can", "message_ram"; interrupts = <56 IRQ_TYPE_LEVEL_HIGH 7>, <64 IRQ_TYPE_LEVEL_HIGH 7>; @@ -1130,7 +1131,7 @@ can1: can@fc050000 { compatible = "bosch,m_can"; - reg = <0xfc050000 0x4000>, <0x210000 0x4000>; + reg = <0xfc050000 0x4000>, <0x210000 0x3800>; reg-names = "m_can", "message_ram"; interrupts = <57 IRQ_TYPE_LEVEL_HIGH 7>, <65 IRQ_TYPE_LEVEL_HIGH 7>; @@ -1140,7 +1141,7 @@ assigned-clocks = <&pmc PMC_TYPE_GCK 57>; assigned-clock-parents = <&pmc PMC_TYPE_CORE PMC_UTMI>; assigned-clock-rates = <40000000>; - bosch,mram-cfg = <0x1100 0 0 64 0 0 32 32>; + bosch,mram-cfg = <0x1c00 0 0 64 0 0 32 32>; status = "disabled"; }; diff --git a/arch/arm/boot/dts/ste-db8500.dtsi b/arch/arm/boot/dts/ste-db8500.dtsi index d309fad32229d9..344d29853bf76f 100644 --- a/arch/arm/boot/dts/ste-db8500.dtsi +++ b/arch/arm/boot/dts/ste-db8500.dtsi @@ -12,4 +12,42 @@ 200000 0>; }; }; + + reserved-memory { + #address-cells = <1>; + #size-cells = <1>; + ranges; + + /* Modem trace memory */ + ram@06000000 { + reg = <0x06000000 0x00f00000>; + no-map; + }; + + /* Modem shared memory */ + ram@06f00000 { + reg = <0x06f00000 0x00100000>; + no-map; + }; + + /* Modem private memory */ + ram@07000000 { + reg = <0x07000000 0x01000000>; + no-map; + }; + + /* + * Initial Secure Software ISSW memory + * + * This is probably only used if the kernel tries + * to actually call into trustzone to run secure + * applications, which the mainline kernel probably + * will not do on this old chipset. But you can never + * be too careful, so reserve this memory anyway. + */ + ram@17f00000 { + reg = <0x17f00000 0x00100000>; + no-map; + }; + }; }; diff --git a/arch/arm/boot/dts/ste-db8520.dtsi b/arch/arm/boot/dts/ste-db8520.dtsi index 48bd8728ae27fd..287804e9e18367 100644 --- a/arch/arm/boot/dts/ste-db8520.dtsi +++ b/arch/arm/boot/dts/ste-db8520.dtsi @@ -12,4 +12,42 @@ 200000 0>; }; }; + + reserved-memory { + #address-cells = <1>; + #size-cells = <1>; + ranges; + + /* Modem trace memory */ + ram@06000000 { + reg = <0x06000000 0x00f00000>; + no-map; + }; + + /* Modem shared memory */ + ram@06f00000 { + reg = <0x06f00000 0x00100000>; + no-map; + }; + + /* Modem private memory */ + ram@07000000 { + reg = <0x07000000 0x01000000>; + no-map; + }; + + /* + * Initial Secure Software ISSW memory + * + * This is probably only used if the kernel tries + * to actually call into trustzone to run secure + * applications, which the mainline kernel probably + * will not do on this old chipset. But you can never + * be too careful, so reserve this memory anyway. + */ + ram@17f00000 { + reg = <0x17f00000 0x00100000>; + no-map; + }; + }; }; diff --git a/arch/arm/boot/dts/ste-db9500.dtsi b/arch/arm/boot/dts/ste-db9500.dtsi new file mode 100644 index 00000000000000..0afff703191c67 --- /dev/null +++ b/arch/arm/boot/dts/ste-db9500.dtsi @@ -0,0 +1,35 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "ste-dbx5x0.dtsi" + +/ { + cpus { + cpu@300 { + /* cpufreq controls */ + operating-points = <1152000 0 + 800000 0 + 400000 0 + 200000 0>; + }; + }; + + reserved-memory { + #address-cells = <1>; + #size-cells = <1>; + ranges; + + /* + * Initial Secure Software ISSW memory + * + * This is probably only used if the kernel tries + * to actually call into trustzone to run secure + * applications, which the mainline kernel probably + * will not do on this old chipset. But you can never + * be too careful, so reserve this memory anyway. + */ + ram@17f00000 { + reg = <0x17f00000 0x00100000>; + no-map; + }; + }; +}; diff --git a/arch/arm/boot/dts/ste-snowball.dts b/arch/arm/boot/dts/ste-snowball.dts index be90e73c923ec2..27d8a07718a001 100644 --- a/arch/arm/boot/dts/ste-snowball.dts +++ b/arch/arm/boot/dts/ste-snowball.dts @@ -4,7 +4,7 @@ */ /dts-v1/; -#include "ste-db8500.dtsi" +#include "ste-db9500.dtsi" #include "ste-href-ab8500.dtsi" #include "ste-href-family-pinctrl.dtsi" diff --git a/arch/arm/boot/dts/ste-ux500-samsung-golden.dts b/arch/arm/boot/dts/ste-ux500-samsung-golden.dts index a1093cb37dc7ad..aed1f2d5f2467e 100644 --- a/arch/arm/boot/dts/ste-ux500-samsung-golden.dts +++ b/arch/arm/boot/dts/ste-ux500-samsung-golden.dts @@ -326,6 +326,7 @@ panel@0 { compatible = "samsung,s6e63m0"; reg = <0>; + max-brightness = <15>; vdd3-supply = <&panel_reg_3v0>; vci-supply = <&panel_reg_1v8>; reset-gpios = <&gpio4 11 GPIO_ACTIVE_LOW>; diff --git a/arch/arm/boot/dts/tegra20-ventana.dts b/arch/arm/boot/dts/tegra20-ventana.dts index b158771ac0b7da..055334ae3d2889 100644 --- a/arch/arm/boot/dts/tegra20-ventana.dts +++ b/arch/arm/boot/dts/tegra20-ventana.dts @@ -3,6 +3,7 @@ #include #include "tegra20.dtsi" +#include "tegra20-cpu-opp.dtsi" / { model = "NVIDIA Tegra20 Ventana evaluation board"; @@ -592,6 +593,16 @@ #clock-cells = <0>; }; + cpus { + cpu0: cpu@0 { + operating-points-v2 = <&cpu0_opp_table>; + }; + + cpu@1 { + operating-points-v2 = <&cpu0_opp_table>; + }; + }; + gpio-keys { compatible = "gpio-keys"; diff --git a/arch/arm/crypto/aes-ce-core.S b/arch/arm/crypto/aes-ce-core.S index 4d1707388d9419..312428d83eedb2 100644 --- a/arch/arm/crypto/aes-ce-core.S +++ b/arch/arm/crypto/aes-ce-core.S @@ -386,20 +386,32 @@ ENTRY(ce_aes_ctr_encrypt) .Lctrloop4x: subs r4, r4, #4 bmi .Lctr1x - add r6, r6, #1 + + /* + * NOTE: the sequence below has been carefully tweaked to avoid + * a silicon erratum that exists in Cortex-A57 (#1742098) and + * Cortex-A72 (#1655431) cores, where AESE/AESMC instruction pairs + * may produce an incorrect result if they take their input from a + * register of which a single 32-bit lane has been updated the last + * time it was modified. To work around this, the lanes of registers + * q0-q3 below are not manipulated individually, and the different + * counter values are prepared by successive manipulations of q7. + */ + add ip, r6, #1 vmov q0, q7 + rev ip, ip + add lr, r6, #2 + vmov s31, ip @ set lane 3 of q1 via q7 + add ip, r6, #3 + rev lr, lr vmov q1, q7 - rev ip, r6 - add r6, r6, #1 + vmov s31, lr @ set lane 3 of q2 via q7 + rev ip, ip vmov q2, q7 - vmov s7, ip - rev ip, r6 - add r6, r6, #1 + vmov s31, ip @ set lane 3 of q3 via q7 + add r6, r6, #4 vmov q3, q7 - vmov s11, ip - rev ip, r6 - add r6, r6, #1 - vmov s15, ip + vld1.8 {q4-q5}, [r1]! vld1.8 {q6}, [r1]! vld1.8 {q15}, [r1]! diff --git a/arch/arm/crypto/aes-neonbs-glue.c b/arch/arm/crypto/aes-neonbs-glue.c index bda8bf17631e19..f70af1d0514b9a 100644 --- a/arch/arm/crypto/aes-neonbs-glue.c +++ b/arch/arm/crypto/aes-neonbs-glue.c @@ -19,7 +19,7 @@ MODULE_AUTHOR("Ard Biesheuvel "); MODULE_LICENSE("GPL v2"); MODULE_ALIAS_CRYPTO("ecb(aes)"); -MODULE_ALIAS_CRYPTO("cbc(aes)"); +MODULE_ALIAS_CRYPTO("cbc(aes)-all"); MODULE_ALIAS_CRYPTO("ctr(aes)"); MODULE_ALIAS_CRYPTO("xts(aes)"); @@ -191,7 +191,8 @@ static int cbc_init(struct crypto_skcipher *tfm) struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm); unsigned int reqsize; - ctx->enc_tfm = crypto_alloc_skcipher("cbc(aes)", 0, CRYPTO_ALG_ASYNC); + ctx->enc_tfm = crypto_alloc_skcipher("cbc(aes)", 0, CRYPTO_ALG_ASYNC | + CRYPTO_ALG_NEED_FALLBACK); if (IS_ERR(ctx->enc_tfm)) return PTR_ERR(ctx->enc_tfm); @@ -441,7 +442,8 @@ static struct skcipher_alg aes_algs[] = { { .base.cra_blocksize = AES_BLOCK_SIZE, .base.cra_ctxsize = sizeof(struct aesbs_cbc_ctx), .base.cra_module = THIS_MODULE, - .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_flags = CRYPTO_ALG_INTERNAL | + CRYPTO_ALG_NEED_FALLBACK, .min_keysize = AES_MIN_KEY_SIZE, .max_keysize = AES_MAX_KEY_SIZE, diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild index 383635b68763c5..f1398b9267c081 100644 --- a/arch/arm/include/asm/Kbuild +++ b/arch/arm/include/asm/Kbuild @@ -2,7 +2,6 @@ generic-y += early_ioremap.h generic-y += extable.h generic-y += flat.h -generic-y += local64.h generic-y += parport.h generic-y += seccomp.h diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index 55a47df0477383..1c9e6d1452c5b2 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -252,31 +252,10 @@ __und_svc: #else svc_entry #endif - @ - @ call emulation code, which returns using r9 if it has emulated - @ the instruction, or the more conventional lr if we are to treat - @ this as a real undefined instruction - @ - @ r0 - instruction - @ -#ifndef CONFIG_THUMB2_KERNEL - ldr r0, [r4, #-4] -#else - mov r1, #2 - ldrh r0, [r4, #-2] @ Thumb instruction at LR - 2 - cmp r0, #0xe800 @ 32-bit instruction if xx >= 0 - blo __und_svc_fault - ldrh r9, [r4] @ bottom 16 bits - add r4, r4, #2 - str r4, [sp, #S_PC] - orr r0, r9, r0, lsl #16 -#endif - badr r9, __und_svc_finish - mov r2, r4 - bl call_fpe mov r1, #4 @ PC correction to apply -__und_svc_fault: + THUMB( tst r5, #PSR_T_BIT ) @ exception taken in Thumb mode? + THUMB( movne r1, #2 ) @ if so, fix up PC correction mov r0, sp @ struct pt_regs *regs bl __und_fault diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S index f8904227e7fdc4..98c1e68bdfcbb9 100644 --- a/arch/arm/kernel/head.S +++ b/arch/arm/kernel/head.S @@ -671,12 +671,8 @@ ARM_BE8(rev16 ip, ip) ldrcc r7, [r4], #4 @ use branch for delay slot bcc 1b bx lr -#else -#ifdef CONFIG_CPU_ENDIAN_BE8 - moveq r0, #0x00004000 @ set bit 22, mov to mvn instruction #else moveq r0, #0x400000 @ set bit 22, mov to mvn instruction -#endif b 2f 1: ldr ip, [r7, r3] #ifdef CONFIG_CPU_ENDIAN_BE8 @@ -685,7 +681,7 @@ ARM_BE8(rev16 ip, ip) tst ip, #0x000f0000 @ check the rotation field orrne ip, ip, r6, lsl #24 @ mask in offset bits 31-24 biceq ip, ip, #0x00004000 @ clear bit 22 - orreq ip, ip, r0 @ mask in offset bits 7-0 + orreq ip, ip, r0, ror #8 @ mask in offset bits 7-0 #else bic ip, ip, #0x000000ff tst ip, #0xf00 @ check the rotation field diff --git a/arch/arm/mach-imx/suspend-imx6.S b/arch/arm/mach-imx/suspend-imx6.S index 1eabf2d2834be7..e06f946b75b96a 100644 --- a/arch/arm/mach-imx/suspend-imx6.S +++ b/arch/arm/mach-imx/suspend-imx6.S @@ -67,6 +67,7 @@ #define MX6Q_CCM_CCR 0x0 .align 3 + .arm .macro sync_l2_cache diff --git a/arch/arm/mach-omap2/omap_device.c b/arch/arm/mach-omap2/omap_device.c index fc7bb2ca16727d..64b23b0cd23c7a 100644 --- a/arch/arm/mach-omap2/omap_device.c +++ b/arch/arm/mach-omap2/omap_device.c @@ -230,10 +230,12 @@ static int _omap_device_notifier_call(struct notifier_block *nb, break; case BUS_NOTIFY_BIND_DRIVER: od = to_omap_device(pdev); - if (od && (od->_state == OMAP_DEVICE_STATE_ENABLED) && - pm_runtime_status_suspended(dev)) { + if (od) { od->_driver_status = BUS_NOTIFY_BIND_DRIVER; - pm_runtime_set_active(dev); + if (od->_state == OMAP_DEVICE_STATE_ENABLED && + pm_runtime_status_suspended(dev)) { + pm_runtime_set_active(dev); + } } break; case BUS_NOTIFY_ADD_DEVICE: diff --git a/arch/arm/mach-omap2/pmic-cpcap.c b/arch/arm/mach-omap2/pmic-cpcap.c index eab281a5fc9f72..09076ad0576d98 100644 --- a/arch/arm/mach-omap2/pmic-cpcap.c +++ b/arch/arm/mach-omap2/pmic-cpcap.c @@ -71,7 +71,7 @@ static struct omap_voltdm_pmic omap_cpcap_iva = { .vp_vstepmin = OMAP4_VP_VSTEPMIN_VSTEPMIN, .vp_vstepmax = OMAP4_VP_VSTEPMAX_VSTEPMAX, .vddmin = 900000, - .vddmax = 1350000, + .vddmax = 1375000, .vp_timeout_us = OMAP4_VP_VLIMITTO_TIMEOUT_US, .i2c_slave_addr = 0x44, .volt_reg_addr = 0x0, diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index 65e4482e384981..02692fbe2db5c5 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -743,6 +743,7 @@ config SWP_EMULATE config CPU_BIG_ENDIAN bool "Build big-endian kernel" depends on ARCH_SUPPORTS_BIG_ENDIAN + depends on !LD_IS_LLD help Say Y if you plan on running a kernel in big-endian mode. Note that your board must be properly built and your board diff --git a/arch/arm/vfp/entry.S b/arch/arm/vfp/entry.S index 0186cf9da890bc..27b0a1f27fbdf3 100644 --- a/arch/arm/vfp/entry.S +++ b/arch/arm/vfp/entry.S @@ -37,20 +37,3 @@ ENDPROC(vfp_null_entry) .align 2 .LCvfp: .word vfp_vector - -@ This code is called if the VFP does not exist. It needs to flag the -@ failure to the VFP initialisation code. - - __INIT -ENTRY(vfp_testing_entry) - dec_preempt_count_ti r10, r4 - ldr r0, VFP_arch_address - str r0, [r0] @ set to non-zero value - ret r9 @ we have handled the fault -ENDPROC(vfp_testing_entry) - - .align 2 -VFP_arch_address: - .word VFP_arch - - __FINIT diff --git a/arch/arm/vfp/vfphw.S b/arch/arm/vfp/vfphw.S index 4fcff9f59947d6..d5837bf05a9a53 100644 --- a/arch/arm/vfp/vfphw.S +++ b/arch/arm/vfp/vfphw.S @@ -79,11 +79,6 @@ ENTRY(vfp_support_entry) DBGSTR3 "instr %08x pc %08x state %p", r0, r2, r10 .fpu vfpv2 - ldr r3, [sp, #S_PSR] @ Neither lazy restore nor FP exceptions - and r3, r3, #MODE_MASK @ are supported in kernel mode - teq r3, #USR_MODE - bne vfp_kmode_exception @ Returns through lr - VFPFMRX r1, FPEXC @ Is the VFP enabled? DBGSTR1 "fpexc %08x", r1 tst r1, #FPEXC_EN diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c index 8c9e7f9f0277d9..2cb355c1b5b716 100644 --- a/arch/arm/vfp/vfpmodule.c +++ b/arch/arm/vfp/vfpmodule.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include "vfpinstr.h" @@ -31,7 +32,6 @@ /* * Our undef handlers (in entry.S) */ -asmlinkage void vfp_testing_entry(void); asmlinkage void vfp_support_entry(void); asmlinkage void vfp_null_entry(void); @@ -42,7 +42,7 @@ asmlinkage void (*vfp_vector)(void) = vfp_null_entry; * Used in startup: set to non-zero if VFP checks fail * After startup, holds VFP architecture */ -unsigned int VFP_arch; +static unsigned int __initdata VFP_arch; /* * The pointer to the vfpstate structure of the thread which currently @@ -436,7 +436,7 @@ static void vfp_enable(void *unused) * present on all CPUs within a SMP complex. Needs to be called prior to * vfp_init(). */ -void vfp_disable(void) +void __init vfp_disable(void) { if (VFP_arch) { pr_debug("%s: should be called prior to vfp_init\n", __func__); @@ -642,7 +642,9 @@ static int vfp_starting_cpu(unsigned int unused) return 0; } -void vfp_kmode_exception(void) +#ifdef CONFIG_KERNEL_MODE_NEON + +static int vfp_kmode_exception(struct pt_regs *regs, unsigned int instr) { /* * If we reach this point, a floating point exception has been raised @@ -660,9 +662,51 @@ void vfp_kmode_exception(void) pr_crit("BUG: unsupported FP instruction in kernel mode\n"); else pr_crit("BUG: FP instruction issued in kernel mode with FP unit disabled\n"); + pr_crit("FPEXC == 0x%08x\n", fmrx(FPEXC)); + return 1; } -#ifdef CONFIG_KERNEL_MODE_NEON +static struct undef_hook vfp_kmode_exception_hook[] = {{ + .instr_mask = 0xfe000000, + .instr_val = 0xf2000000, + .cpsr_mask = MODE_MASK | PSR_T_BIT, + .cpsr_val = SVC_MODE, + .fn = vfp_kmode_exception, +}, { + .instr_mask = 0xff100000, + .instr_val = 0xf4000000, + .cpsr_mask = MODE_MASK | PSR_T_BIT, + .cpsr_val = SVC_MODE, + .fn = vfp_kmode_exception, +}, { + .instr_mask = 0xef000000, + .instr_val = 0xef000000, + .cpsr_mask = MODE_MASK | PSR_T_BIT, + .cpsr_val = SVC_MODE | PSR_T_BIT, + .fn = vfp_kmode_exception, +}, { + .instr_mask = 0xff100000, + .instr_val = 0xf9000000, + .cpsr_mask = MODE_MASK | PSR_T_BIT, + .cpsr_val = SVC_MODE | PSR_T_BIT, + .fn = vfp_kmode_exception, +}, { + .instr_mask = 0x0c000e00, + .instr_val = 0x0c000a00, + .cpsr_mask = MODE_MASK, + .cpsr_val = SVC_MODE, + .fn = vfp_kmode_exception, +}}; + +static int __init vfp_kmode_exception_hook_init(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(vfp_kmode_exception_hook); i++) + register_undef_hook(&vfp_kmode_exception_hook[i]); + return 0; +} +subsys_initcall(vfp_kmode_exception_hook_init); /* * Kernel-side NEON support functions @@ -708,6 +752,21 @@ EXPORT_SYMBOL(kernel_neon_end); #endif /* CONFIG_KERNEL_MODE_NEON */ +static int __init vfp_detect(struct pt_regs *regs, unsigned int instr) +{ + VFP_arch = UINT_MAX; /* mark as not present */ + regs->ARM_pc += 4; + return 0; +} + +static struct undef_hook vfp_detect_hook __initdata = { + .instr_mask = 0x0c000e00, + .instr_val = 0x0c000a00, + .cpsr_mask = MODE_MASK, + .cpsr_val = SVC_MODE, + .fn = vfp_detect, +}; + /* * VFP support code initialisation. */ @@ -728,10 +787,11 @@ static int __init vfp_init(void) * The handler is already setup to just log calls, so * we just need to read the VFPSID register. */ - vfp_vector = vfp_testing_entry; + register_undef_hook(&vfp_detect_hook); barrier(); vfpsid = fmrx(FPSID); barrier(); + unregister_undef_hook(&vfp_detect_hook); vfp_vector = vfp_null_entry; pr_info("VFP support v0.3: "); diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c index 60e901cd0de6a3..5a957a9a09843c 100644 --- a/arch/arm/xen/enlighten.c +++ b/arch/arm/xen/enlighten.c @@ -371,7 +371,7 @@ static int __init xen_guest_init(void) } gnttab_init(); if (!xen_initial_domain()) - xenbus_probe(NULL); + xenbus_probe(); /* * Making sure board specific code will not set up ops for diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 6a87d592bd001f..485b7dbd4f9e32 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -10,7 +10,7 @@ # # Copyright (C) 1995-2001 by Russell King -LDFLAGS_vmlinux :=--no-undefined -X -z norelro +LDFLAGS_vmlinux :=--no-undefined -X ifeq ($(CONFIG_RELOCATABLE), y) # Pass --no-apply-dynamic-relocs to restore pre-binutils-2.27 behaviour @@ -110,16 +110,20 @@ KBUILD_CPPFLAGS += -mbig-endian CHECKFLAGS += -D__AARCH64EB__ # Prefer the baremetal ELF build target, but not all toolchains include # it so fall back to the standard linux version if needed. -KBUILD_LDFLAGS += -EB $(call ld-option, -maarch64elfb, -maarch64linuxb) +KBUILD_LDFLAGS += -EB $(call ld-option, -maarch64elfb, -maarch64linuxb -z norelro) UTS_MACHINE := aarch64_be else KBUILD_CPPFLAGS += -mlittle-endian CHECKFLAGS += -D__AARCH64EL__ # Same as above, prefer ELF but fall back to linux target if needed. -KBUILD_LDFLAGS += -EL $(call ld-option, -maarch64elf, -maarch64linux) +KBUILD_LDFLAGS += -EL $(call ld-option, -maarch64elf, -maarch64linux -z norelro) UTS_MACHINE := aarch64 endif +ifeq ($(CONFIG_LD_IS_LLD), y) +KBUILD_LDFLAGS += -z norelro +endif + CHECKFLAGS += -D__aarch64__ ifeq ($(CONFIG_DYNAMIC_FTRACE_WITH_REGS),y) diff --git a/arch/arm64/boot/dts/amlogic/meson-g12a-x96-max.dts b/arch/arm64/boot/dts/amlogic/meson-g12a-x96-max.dts index 1b07c8c06eac58..463a72d6bb7c7b 100644 --- a/arch/arm64/boot/dts/amlogic/meson-g12a-x96-max.dts +++ b/arch/arm64/boot/dts/amlogic/meson-g12a-x96-max.dts @@ -340,7 +340,7 @@ eee-broken-1000t; reset-assert-us = <10000>; - reset-deassert-us = <30000>; + reset-deassert-us = <80000>; reset-gpios = <&gpio GPIOZ_15 (GPIO_ACTIVE_LOW | GPIO_OPEN_DRAIN)>; interrupt-parent = <&gpio_intc>; diff --git a/arch/arm64/boot/dts/amlogic/meson-g12b-odroid-n2.dtsi b/arch/arm64/boot/dts/amlogic/meson-g12b-odroid-n2.dtsi index 6982632ae6461a..39a09661c5f62f 100644 --- a/arch/arm64/boot/dts/amlogic/meson-g12b-odroid-n2.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-g12b-odroid-n2.dtsi @@ -413,7 +413,7 @@ max-speed = <1000>; reset-assert-us = <10000>; - reset-deassert-us = <30000>; + reset-deassert-us = <80000>; reset-gpios = <&gpio GPIOZ_15 (GPIO_ACTIVE_LOW | GPIO_OPEN_DRAIN)>; interrupt-parent = <&gpio_intc>; diff --git a/arch/arm64/boot/dts/amlogic/meson-g12b-w400.dtsi b/arch/arm64/boot/dts/amlogic/meson-g12b-w400.dtsi index 2802ddbb83ac78..feb08850474000 100644 --- a/arch/arm64/boot/dts/amlogic/meson-g12b-w400.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-g12b-w400.dtsi @@ -264,7 +264,7 @@ max-speed = <1000>; reset-assert-us = <10000>; - reset-deassert-us = <30000>; + reset-deassert-us = <80000>; reset-gpios = <&gpio GPIOZ_15 (GPIO_ACTIVE_LOW | GPIO_OPEN_DRAIN)>; interrupt-parent = <&gpio_intc>; diff --git a/arch/arm64/boot/dts/amlogic/meson-g12b.dtsi b/arch/arm64/boot/dts/amlogic/meson-g12b.dtsi index 9b8548e5f6e511..ee8fcae9f9f00a 100644 --- a/arch/arm64/boot/dts/amlogic/meson-g12b.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-g12b.dtsi @@ -135,3 +135,7 @@ }; }; }; + +&mali { + dma-coherent; +}; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb-nanopi-k2.dts b/arch/arm64/boot/dts/amlogic/meson-gxbb-nanopi-k2.dts index 7be3e354093bfa..de27beafe9db94 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxbb-nanopi-k2.dts +++ b/arch/arm64/boot/dts/amlogic/meson-gxbb-nanopi-k2.dts @@ -165,7 +165,7 @@ reg = <0>; reset-assert-us = <10000>; - reset-deassert-us = <30000>; + reset-deassert-us = <80000>; reset-gpios = <&gpio GPIOZ_14 GPIO_ACTIVE_LOW>; interrupt-parent = <&gpio_intc>; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb-odroidc2.dts b/arch/arm64/boot/dts/amlogic/meson-gxbb-odroidc2.dts index 70fcfb7b0683d4..50de1d01e5655e 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxbb-odroidc2.dts +++ b/arch/arm64/boot/dts/amlogic/meson-gxbb-odroidc2.dts @@ -200,7 +200,7 @@ reg = <0>; reset-assert-us = <10000>; - reset-deassert-us = <30000>; + reset-deassert-us = <80000>; reset-gpios = <&gpio GPIOZ_14 GPIO_ACTIVE_LOW>; interrupt-parent = <&gpio_intc>; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb-vega-s95.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxbb-vega-s95.dtsi index 222ee8069cfaa4..9b0b81f191f1fc 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxbb-vega-s95.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gxbb-vega-s95.dtsi @@ -126,7 +126,7 @@ reg = <0>; reset-assert-us = <10000>; - reset-deassert-us = <30000>; + reset-deassert-us = <80000>; reset-gpios = <&gpio GPIOZ_14 GPIO_ACTIVE_LOW>; interrupt-parent = <&gpio_intc>; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb-wetek.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxbb-wetek.dtsi index ad812854a107f4..a350fee1264d7b 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxbb-wetek.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gxbb-wetek.dtsi @@ -147,7 +147,7 @@ reg = <0>; reset-assert-us = <10000>; - reset-deassert-us = <30000>; + reset-deassert-us = <80000>; reset-gpios = <&gpio GPIOZ_14 GPIO_ACTIVE_LOW>; interrupt-parent = <&gpio_intc>; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl-s905d-p230.dts b/arch/arm64/boot/dts/amlogic/meson-gxl-s905d-p230.dts index b08c4537f260db..b2ab05c2209031 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxl-s905d-p230.dts +++ b/arch/arm64/boot/dts/amlogic/meson-gxl-s905d-p230.dts @@ -82,7 +82,7 @@ /* External PHY reset is shared with internal PHY Led signal */ reset-assert-us = <10000>; - reset-deassert-us = <30000>; + reset-deassert-us = <80000>; reset-gpios = <&gpio GPIOZ_14 GPIO_ACTIVE_LOW>; interrupt-parent = <&gpio_intc>; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxm-khadas-vim2.dts b/arch/arm64/boot/dts/amlogic/meson-gxm-khadas-vim2.dts index bff8ec2c1c70ce..62d3e04299b67e 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxm-khadas-vim2.dts +++ b/arch/arm64/boot/dts/amlogic/meson-gxm-khadas-vim2.dts @@ -194,7 +194,7 @@ reg = <0>; reset-assert-us = <10000>; - reset-deassert-us = <30000>; + reset-deassert-us = <80000>; reset-gpios = <&gpio GPIOZ_14 GPIO_ACTIVE_LOW>; interrupt-parent = <&gpio_intc>; @@ -341,7 +341,7 @@ #size-cells = <1>; compatible = "winbond,w25q16", "jedec,spi-nor"; reg = <0>; - spi-max-frequency = <3000000>; + spi-max-frequency = <104000000>; }; }; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxm-nexbox-a1.dts b/arch/arm64/boot/dts/amlogic/meson-gxm-nexbox-a1.dts index 83eca3af44ce7b..dfa7a37a1281f8 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxm-nexbox-a1.dts +++ b/arch/arm64/boot/dts/amlogic/meson-gxm-nexbox-a1.dts @@ -112,7 +112,7 @@ max-speed = <1000>; reset-assert-us = <10000>; - reset-deassert-us = <30000>; + reset-deassert-us = <80000>; reset-gpios = <&gpio GPIOZ_14 GPIO_ACTIVE_LOW>; }; }; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxm-q200.dts b/arch/arm64/boot/dts/amlogic/meson-gxm-q200.dts index ea45ae0c71b7f1..8edbfe040805c5 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxm-q200.dts +++ b/arch/arm64/boot/dts/amlogic/meson-gxm-q200.dts @@ -64,7 +64,7 @@ /* External PHY reset is shared with internal PHY Led signal */ reset-assert-us = <10000>; - reset-deassert-us = <30000>; + reset-deassert-us = <80000>; reset-gpios = <&gpio GPIOZ_14 GPIO_ACTIVE_LOW>; interrupt-parent = <&gpio_intc>; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxm-rbox-pro.dts b/arch/arm64/boot/dts/amlogic/meson-gxm-rbox-pro.dts index c89c9f846fb10f..dde7cfe12cffa1 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxm-rbox-pro.dts +++ b/arch/arm64/boot/dts/amlogic/meson-gxm-rbox-pro.dts @@ -114,7 +114,7 @@ max-speed = <1000>; reset-assert-us = <10000>; - reset-deassert-us = <30000>; + reset-deassert-us = <80000>; reset-gpios = <&gpio GPIOZ_14 GPIO_ACTIVE_LOW>; }; }; diff --git a/arch/arm64/boot/dts/amlogic/meson-sm1.dtsi b/arch/arm64/boot/dts/amlogic/meson-sm1.dtsi index 71317f5aada1d7..c309517abae32f 100644 --- a/arch/arm64/boot/dts/amlogic/meson-sm1.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-sm1.dtsi @@ -130,7 +130,7 @@ opp-microvolt = <790000>; }; - opp-1512000000 { + opp-1500000000 { opp-hz = /bits/ 64 <1500000000>; opp-microvolt = <800000>; }; diff --git a/arch/arm64/boot/dts/broadcom/stingray/stingray-usb.dtsi b/arch/arm64/boot/dts/broadcom/stingray/stingray-usb.dtsi index aef8f2b00778d7..5401a646c8406f 100644 --- a/arch/arm64/boot/dts/broadcom/stingray/stingray-usb.dtsi +++ b/arch/arm64/boot/dts/broadcom/stingray/stingray-usb.dtsi @@ -4,11 +4,16 @@ */ usb { compatible = "simple-bus"; - dma-ranges; #address-cells = <2>; #size-cells = <2>; ranges = <0x0 0x0 0x0 0x68500000 0x0 0x00400000>; + /* + * Internally, USB bus to the interconnect can only address up + * to 40-bit + */ + dma-ranges = <0 0 0 0 0x100 0x0>; + usbphy0: usb-phy@0 { compatible = "brcm,sr-usb-combo-phy"; reg = <0x0 0x00000000 0x0 0x100>; diff --git a/arch/arm64/boot/dts/exynos/exynos7.dtsi b/arch/arm64/boot/dts/exynos/exynos7.dtsi index b9ed6a33e2901a..7599e1a00ff516 100644 --- a/arch/arm64/boot/dts/exynos/exynos7.dtsi +++ b/arch/arm64/boot/dts/exynos/exynos7.dtsi @@ -79,8 +79,10 @@ }; psci { - compatible = "arm,psci-0.2"; + compatible = "arm,psci"; method = "smc"; + cpu_off = <0x84000002>; + cpu_on = <0xC4000003>; }; soc: soc@0 { @@ -481,13 +483,6 @@ pmu_system_controller: system-controller@105c0000 { compatible = "samsung,exynos7-pmu", "syscon"; reg = <0x105c0000 0x5000>; - - reboot: syscon-reboot { - compatible = "syscon-reboot"; - regmap = <&pmu_system_controller>; - offset = <0x0400>; - mask = <0x1>; - }; }; rtc: rtc@10590000 { @@ -687,3 +682,4 @@ }; #include "exynos7-pinctrl.dtsi" +#include "arm/exynos-syscon-restart.dtsi" diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28.dts b/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28.dts index 8161dd23797122..b3fa4dbeebd527 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28.dts +++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28.dts @@ -155,20 +155,10 @@ }; partition@210000 { - reg = <0x210000 0x0f0000>; + reg = <0x210000 0x1d0000>; label = "bootloader"; }; - partition@300000 { - reg = <0x300000 0x040000>; - label = "DP firmware"; - }; - - partition@340000 { - reg = <0x340000 0x0a0000>; - label = "trusted firmware"; - }; - partition@3e0000 { reg = <0x3e0000 0x020000>; label = "bootloader environment"; diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi index 7a6fb7e1fb82f5..62f4dcb96e70dd 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi @@ -93,7 +93,7 @@ reboot { compatible ="syscon-reboot"; regmap = <&rst>; - offset = <0xb0>; + offset = <0>; mask = <0x02>; }; @@ -309,7 +309,7 @@ <0x0 0x20000000 0x0 0x10000000>; reg-names = "fspi_base", "fspi_mmap"; interrupts = ; - clocks = <&clockgen 4 3>, <&clockgen 4 3>; + clocks = <&clockgen 2 0>, <&clockgen 2 0>; clock-names = "fspi_en", "fspi"; status = "disabled"; }; @@ -934,7 +934,7 @@ ethernet@0,4 { compatible = "fsl,enetc-ptp"; reg = <0x000400 0 0 0 0>; - clocks = <&clockgen 4 0>; + clocks = <&clockgen 2 3>; little-endian; fsl,extts-fifo; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mp.dtsi b/arch/arm64/boot/dts/freescale/imx8mp.dtsi index 6038f66aefc10f..03ef0e5f909e47 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mp.dtsi @@ -259,7 +259,7 @@ #gpio-cells = <2>; interrupt-controller; #interrupt-cells = <2>; - gpio-ranges = <&iomuxc 0 56 26>, <&iomuxc 0 144 4>; + gpio-ranges = <&iomuxc 0 56 26>, <&iomuxc 26 144 4>; }; gpio4: gpio@30230000 { diff --git a/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts b/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts index f3a678e0fd99bb..bf76ebe4637949 100644 --- a/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts +++ b/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts @@ -146,7 +146,7 @@ pinctrl-names = "default"; pinctrl-0 = <&rgmii_pins>; phy-mode = "rgmii-id"; - phy = <&phy1>; + phy-handle = <&phy1>; status = "okay"; }; diff --git a/arch/arm64/boot/dts/marvell/armada-7040.dtsi b/arch/arm64/boot/dts/marvell/armada-7040.dtsi index 7a3198cd7a0717..2f440711d21d20 100644 --- a/arch/arm64/boot/dts/marvell/armada-7040.dtsi +++ b/arch/arm64/boot/dts/marvell/armada-7040.dtsi @@ -15,10 +15,6 @@ "marvell,armada-ap806"; }; -&smmu { - status = "okay"; -}; - &cp0_pcie0 { iommu-map = <0x0 &smmu 0x480 0x20>, diff --git a/arch/arm64/boot/dts/marvell/armada-8040.dtsi b/arch/arm64/boot/dts/marvell/armada-8040.dtsi index 79e8ce59baa885..22c2d6ebf38187 100644 --- a/arch/arm64/boot/dts/marvell/armada-8040.dtsi +++ b/arch/arm64/boot/dts/marvell/armada-8040.dtsi @@ -15,10 +15,6 @@ "marvell,armada-ap806"; }; -&smmu { - status = "okay"; -}; - &cp0_pcie0 { iommu-map = <0x0 &smmu 0x480 0x20>, diff --git a/arch/arm64/boot/dts/mediatek/mt8183.dtsi b/arch/arm64/boot/dts/mediatek/mt8183.dtsi index 9cfd961c45eb39..08a914d3a64352 100644 --- a/arch/arm64/boot/dts/mediatek/mt8183.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8183.dtsi @@ -363,7 +363,7 @@ compatible = "mediatek,mt8183-gce"; reg = <0 0x10238000 0 0x4000>; interrupts = ; - #mbox-cells = <3>; + #mbox-cells = <2>; clocks = <&infracfg CLK_INFRA_GCE>; clock-names = "gce"; }; diff --git a/arch/arm64/boot/dts/nvidia/tegra194.dtsi b/arch/arm64/boot/dts/nvidia/tegra194.dtsi index 93438d2b946962..6946fb210e4848 100644 --- a/arch/arm64/boot/dts/nvidia/tegra194.dtsi +++ b/arch/arm64/boot/dts/nvidia/tegra194.dtsi @@ -378,7 +378,7 @@ nvidia,schmitt = ; nvidia,lpdr = ; nvidia,enable-input = ; - nvidia,io-high-voltage = ; + nvidia,io-hv = ; nvidia,tristate = ; nvidia,pull = ; }; @@ -390,7 +390,7 @@ nvidia,schmitt = ; nvidia,lpdr = ; nvidia,enable-input = ; - nvidia,io-high-voltage = ; + nvidia,io-hv = ; nvidia,tristate = ; nvidia,pull = ; }; diff --git a/arch/arm64/boot/dts/qcom/ipq6018.dtsi b/arch/arm64/boot/dts/qcom/ipq6018.dtsi index 59e0cbfa221430..cdc1e3d60c58ee 100644 --- a/arch/arm64/boot/dts/qcom/ipq6018.dtsi +++ b/arch/arm64/boot/dts/qcom/ipq6018.dtsi @@ -156,8 +156,8 @@ no-map; }; - tz: tz@48500000 { - reg = <0x0 0x48500000 0x0 0x00200000>; + tz: memory@4a600000 { + reg = <0x0 0x4a600000 0x0 0x00400000>; no-map; }; @@ -167,7 +167,7 @@ }; q6_region: memory@4ab00000 { - reg = <0x0 0x4ab00000 0x0 0x02800000>; + reg = <0x0 0x4ab00000 0x0 0x05500000>; no-map; }; }; diff --git a/arch/arm64/boot/dts/qcom/msm8916-samsung-a2015-common.dtsi b/arch/arm64/boot/dts/qcom/msm8916-samsung-a2015-common.dtsi index b18d21e42f596c..f7ac4c4033db69 100644 --- a/arch/arm64/boot/dts/qcom/msm8916-samsung-a2015-common.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8916-samsung-a2015-common.dtsi @@ -78,6 +78,9 @@ sda-gpios = <&msmgpio 105 (GPIO_ACTIVE_HIGH|GPIO_OPEN_DRAIN)>; scl-gpios = <&msmgpio 106 (GPIO_ACTIVE_HIGH|GPIO_OPEN_DRAIN)>; + pinctrl-names = "default"; + pinctrl-0 = <&muic_i2c_default>; + #address-cells = <1>; #size-cells = <0>; @@ -314,6 +317,14 @@ }; }; + muic_i2c_default: muic-i2c-default { + pins = "gpio105", "gpio106"; + function = "gpio"; + + drive-strength = <2>; + bias-disable; + }; + muic_int_default: muic-int-default { pins = "gpio12"; function = "gpio"; diff --git a/arch/arm64/boot/dts/qcom/sc7180.dtsi b/arch/arm64/boot/dts/qcom/sc7180.dtsi index 6678f1e8e3958c..c71f3afc1cc9f9 100644 --- a/arch/arm64/boot/dts/qcom/sc7180.dtsi +++ b/arch/arm64/boot/dts/qcom/sc7180.dtsi @@ -1394,7 +1394,8 @@ ipa: ipa@1e40000 { compatible = "qcom,sc7180-ipa"; - iommus = <&apps_smmu 0x440 0x3>; + iommus = <&apps_smmu 0x440 0x0>, + <&apps_smmu 0x442 0x0>; reg = <0 0x1e40000 0 0x7000>, <0 0x1e47000 0 0x2000>, <0 0x1e04000 0 0x2c000>; @@ -2811,7 +2812,7 @@ interrupt-controller; #interrupt-cells = <1>; - interconnects = <&mmss_noc MASTER_MDP0 &mc_virt SLAVE_EBI1>; + interconnects = <&mmss_noc MASTER_MDP0 0 &mc_virt SLAVE_EBI1 0>; interconnect-names = "mdp0-mem"; iommus = <&apps_smmu 0x800 0x2>; diff --git a/arch/arm64/boot/dts/qcom/sdm845.dtsi b/arch/arm64/boot/dts/qcom/sdm845.dtsi index 40e8c11f23ab0f..f97f354af86f4e 100644 --- a/arch/arm64/boot/dts/qcom/sdm845.dtsi +++ b/arch/arm64/boot/dts/qcom/sdm845.dtsi @@ -2141,7 +2141,8 @@ ipa: ipa@1e40000 { compatible = "qcom,sdm845-ipa"; - iommus = <&apps_smmu 0x720 0x3>; + iommus = <&apps_smmu 0x720 0x0>, + <&apps_smmu 0x722 0x0>; reg = <0 0x1e40000 0 0x7000>, <0 0x1e47000 0 0x2000>, <0 0x1e04000 0 0x2c000>; diff --git a/arch/arm64/boot/dts/qcom/sdm850-lenovo-yoga-c630.dts b/arch/arm64/boot/dts/qcom/sdm850-lenovo-yoga-c630.dts index d03ca31907466b..76a8c996d497f0 100644 --- a/arch/arm64/boot/dts/qcom/sdm850-lenovo-yoga-c630.dts +++ b/arch/arm64/boot/dts/qcom/sdm850-lenovo-yoga-c630.dts @@ -264,23 +264,28 @@ status = "okay"; clock-frequency = <400000>; - hid@15 { + tsel: hid@15 { compatible = "hid-over-i2c"; reg = <0x15>; hid-descr-addr = <0x1>; - interrupts-extended = <&tlmm 37 IRQ_TYPE_EDGE_RISING>; + interrupts-extended = <&tlmm 37 IRQ_TYPE_LEVEL_HIGH>; + + pinctrl-names = "default"; + pinctrl-0 = <&i2c3_hid_active>; }; - hid@2c { + tsc2: hid@2c { compatible = "hid-over-i2c"; reg = <0x2c>; hid-descr-addr = <0x20>; - interrupts-extended = <&tlmm 37 IRQ_TYPE_EDGE_RISING>; + interrupts-extended = <&tlmm 37 IRQ_TYPE_LEVEL_HIGH>; pinctrl-names = "default"; - pinctrl-0 = <&i2c2_hid_active>; + pinctrl-0 = <&i2c3_hid_active>; + + status = "disabled"; }; }; @@ -288,15 +293,15 @@ status = "okay"; clock-frequency = <400000>; - hid@10 { + tsc1: hid@10 { compatible = "hid-over-i2c"; reg = <0x10>; hid-descr-addr = <0x1>; - interrupts-extended = <&tlmm 125 IRQ_TYPE_EDGE_FALLING>; + interrupts-extended = <&tlmm 125 IRQ_TYPE_LEVEL_LOW>; pinctrl-names = "default"; - pinctrl-0 = <&i2c6_hid_active>; + pinctrl-0 = <&i2c5_hid_active>; }; }; @@ -304,7 +309,7 @@ status = "okay"; clock-frequency = <400000>; - hid@5c { + ecsh: hid@5c { compatible = "hid-over-i2c"; reg = <0x5c>; hid-descr-addr = <0x1>; @@ -312,7 +317,7 @@ interrupts-extended = <&tlmm 92 IRQ_TYPE_LEVEL_LOW>; pinctrl-names = "default"; - pinctrl-0 = <&i2c12_hid_active>; + pinctrl-0 = <&i2c11_hid_active>; }; }; @@ -426,8 +431,8 @@ &tlmm { gpio-reserved-ranges = <0 4>, <81 4>; - i2c2_hid_active: i2c2-hid-active { - pins = <37>; + i2c3_hid_active: i2c2-hid-active { + pins = "gpio37"; function = "gpio"; input-enable; @@ -435,8 +440,8 @@ drive-strength = <2>; }; - i2c6_hid_active: i2c6-hid-active { - pins = <125>; + i2c5_hid_active: i2c5-hid-active { + pins = "gpio125"; function = "gpio"; input-enable; @@ -444,8 +449,8 @@ drive-strength = <2>; }; - i2c12_hid_active: i2c12-hid-active { - pins = <92>; + i2c11_hid_active: i2c11-hid-active { + pins = "gpio92"; function = "gpio"; input-enable; @@ -454,7 +459,7 @@ }; wcd_intr_default: wcd_intr_default { - pins = <54>; + pins = "gpio54"; function = "gpio"; input-enable; diff --git a/arch/arm64/boot/dts/qcom/sm8250-mtp.dts b/arch/arm64/boot/dts/qcom/sm8250-mtp.dts index fd194ed7fbc86c..98675e1f8204f7 100644 --- a/arch/arm64/boot/dts/qcom/sm8250-mtp.dts +++ b/arch/arm64/boot/dts/qcom/sm8250-mtp.dts @@ -14,7 +14,7 @@ / { model = "Qualcomm Technologies, Inc. SM8250 MTP"; - compatible = "qcom,sm8250-mtp"; + compatible = "qcom,sm8250-mtp", "qcom,sm8250"; aliases { serial0 = &uart12; diff --git a/arch/arm64/boot/dts/renesas/cat875.dtsi b/arch/arm64/boot/dts/renesas/cat875.dtsi index 33daa957068400..801ea54b027c43 100644 --- a/arch/arm64/boot/dts/renesas/cat875.dtsi +++ b/arch/arm64/boot/dts/renesas/cat875.dtsi @@ -21,7 +21,6 @@ status = "okay"; phy0: ethernet-phy@0 { - rxc-skew-ps = <1500>; reg = <0>; interrupt-parent = <&gpio2>; interrupts = <21 IRQ_TYPE_LEVEL_LOW>; diff --git a/arch/arm64/boot/dts/renesas/hihope-rzg2-ex.dtsi b/arch/arm64/boot/dts/renesas/hihope-rzg2-ex.dtsi index 178401a34cbf8d..b9e46aed533628 100644 --- a/arch/arm64/boot/dts/renesas/hihope-rzg2-ex.dtsi +++ b/arch/arm64/boot/dts/renesas/hihope-rzg2-ex.dtsi @@ -23,7 +23,6 @@ status = "okay"; phy0: ethernet-phy@0 { - rxc-skew-ps = <1500>; reg = <0>; interrupt-parent = <&gpio2>; interrupts = <11 IRQ_TYPE_LEVEL_LOW>; diff --git a/arch/arm64/boot/dts/rockchip/rk3328-roc-cc.dts b/arch/arm64/boot/dts/rockchip/rk3328-roc-cc.dts index b70ffb1c6a6302..b76282e704de18 100644 --- a/arch/arm64/boot/dts/rockchip/rk3328-roc-cc.dts +++ b/arch/arm64/boot/dts/rockchip/rk3328-roc-cc.dts @@ -334,6 +334,7 @@ }; &usb20_otg { + dr_mode = "host"; status = "okay"; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3328.dtsi b/arch/arm64/boot/dts/rockchip/rk3328.dtsi index bbdb19a3e85d1a..db0d5c8e5f96ab 100644 --- a/arch/arm64/boot/dts/rockchip/rk3328.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3328.dtsi @@ -1237,8 +1237,8 @@ uart0 { uart0_xfer: uart0-xfer { - rockchip,pins = <1 RK_PB1 1 &pcfg_pull_up>, - <1 RK_PB0 1 &pcfg_pull_none>; + rockchip,pins = <1 RK_PB1 1 &pcfg_pull_none>, + <1 RK_PB0 1 &pcfg_pull_up>; }; uart0_cts: uart0-cts { @@ -1256,8 +1256,8 @@ uart1 { uart1_xfer: uart1-xfer { - rockchip,pins = <3 RK_PA4 4 &pcfg_pull_up>, - <3 RK_PA6 4 &pcfg_pull_none>; + rockchip,pins = <3 RK_PA4 4 &pcfg_pull_none>, + <3 RK_PA6 4 &pcfg_pull_up>; }; uart1_cts: uart1-cts { @@ -1275,15 +1275,15 @@ uart2-0 { uart2m0_xfer: uart2m0-xfer { - rockchip,pins = <1 RK_PA0 2 &pcfg_pull_up>, - <1 RK_PA1 2 &pcfg_pull_none>; + rockchip,pins = <1 RK_PA0 2 &pcfg_pull_none>, + <1 RK_PA1 2 &pcfg_pull_up>; }; }; uart2-1 { uart2m1_xfer: uart2m1-xfer { - rockchip,pins = <2 RK_PA0 1 &pcfg_pull_up>, - <2 RK_PA1 1 &pcfg_pull_none>; + rockchip,pins = <2 RK_PA0 1 &pcfg_pull_none>, + <2 RK_PA1 1 &pcfg_pull_up>; }; }; diff --git a/arch/arm64/boot/dts/ti/k3-am65-main.dtsi b/arch/arm64/boot/dts/ti/k3-am65-main.dtsi index 533525229a8db0..b9662205be9bf9 100644 --- a/arch/arm64/boot/dts/ti/k3-am65-main.dtsi +++ b/arch/arm64/boot/dts/ti/k3-am65-main.dtsi @@ -834,7 +834,7 @@ }; }; - dss: dss@04a00000 { + dss: dss@4a00000 { compatible = "ti,am65x-dss"; reg = <0x0 0x04a00000 0x0 0x1000>, /* common */ <0x0 0x04a02000 0x0 0x1000>, /* vidl1 */ @@ -867,6 +867,8 @@ status = "disabled"; + dma-coherent; + dss_ports: ports { #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm64/boot/dts/ti/k3-j721e-main.dtsi b/arch/arm64/boot/dts/ti/k3-j721e-main.dtsi index e2a96b2c423c40..c66ded9079be41 100644 --- a/arch/arm64/boot/dts/ti/k3-j721e-main.dtsi +++ b/arch/arm64/boot/dts/ti/k3-j721e-main.dtsi @@ -1278,7 +1278,7 @@ }; }; - dss: dss@04a00000 { + dss: dss@4a00000 { compatible = "ti,j721e-dss"; reg = <0x00 0x04a00000 0x00 0x10000>, /* common_m */ diff --git a/arch/arm64/crypto/poly1305-armv8.pl b/arch/arm64/crypto/poly1305-armv8.pl index 6e5576d19af8fc..cbc980fb02e335 100644 --- a/arch/arm64/crypto/poly1305-armv8.pl +++ b/arch/arm64/crypto/poly1305-armv8.pl @@ -840,7 +840,6 @@ ldp d14,d15,[sp,#64] addp $ACC2,$ACC2,$ACC2 ldr x30,[sp,#8] - .inst 0xd50323bf // autiasp //////////////////////////////////////////////////////////////// // lazy reduction, but without narrowing @@ -882,6 +881,7 @@ str x4,[$ctx,#8] // set is_base2_26 ldr x29,[sp],#80 + .inst 0xd50323bf // autiasp ret .size poly1305_blocks_neon,.-poly1305_blocks_neon diff --git a/arch/arm64/crypto/poly1305-core.S_shipped b/arch/arm64/crypto/poly1305-core.S_shipped index 8d1c4e420ccdc7..fb2822abf63aa7 100644 --- a/arch/arm64/crypto/poly1305-core.S_shipped +++ b/arch/arm64/crypto/poly1305-core.S_shipped @@ -779,7 +779,6 @@ poly1305_blocks_neon: ldp d14,d15,[sp,#64] addp v21.2d,v21.2d,v21.2d ldr x30,[sp,#8] - .inst 0xd50323bf // autiasp //////////////////////////////////////////////////////////////// // lazy reduction, but without narrowing @@ -821,6 +820,7 @@ poly1305_blocks_neon: str x4,[x0,#8] // set is_base2_26 ldr x29,[sp],#80 + .inst 0xd50323bf // autiasp ret .size poly1305_blocks_neon,.-poly1305_blocks_neon diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild index ff9cbb6312128a..07ac208edc8944 100644 --- a/arch/arm64/include/asm/Kbuild +++ b/arch/arm64/include/asm/Kbuild @@ -1,6 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 generic-y += early_ioremap.h -generic-y += local64.h generic-y += mcs_spinlock.h generic-y += qrwlock.h generic-y += qspinlock.h diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h index 015ddffaf6caa3..b56a4b2bc24864 100644 --- a/arch/arm64/include/asm/atomic.h +++ b/arch/arm64/include/asm/atomic.h @@ -17,7 +17,7 @@ #include #define ATOMIC_OP(op) \ -static inline void arch_##op(int i, atomic_t *v) \ +static __always_inline void arch_##op(int i, atomic_t *v) \ { \ __lse_ll_sc_body(op, i, v); \ } @@ -32,7 +32,7 @@ ATOMIC_OP(atomic_sub) #undef ATOMIC_OP #define ATOMIC_FETCH_OP(name, op) \ -static inline int arch_##op##name(int i, atomic_t *v) \ +static __always_inline int arch_##op##name(int i, atomic_t *v) \ { \ return __lse_ll_sc_body(op##name, i, v); \ } @@ -56,7 +56,7 @@ ATOMIC_FETCH_OPS(atomic_sub_return) #undef ATOMIC_FETCH_OPS #define ATOMIC64_OP(op) \ -static inline void arch_##op(long i, atomic64_t *v) \ +static __always_inline void arch_##op(long i, atomic64_t *v) \ { \ __lse_ll_sc_body(op, i, v); \ } @@ -71,7 +71,7 @@ ATOMIC64_OP(atomic64_sub) #undef ATOMIC64_OP #define ATOMIC64_FETCH_OP(name, op) \ -static inline long arch_##op##name(long i, atomic64_t *v) \ +static __always_inline long arch_##op##name(long i, atomic64_t *v) \ { \ return __lse_ll_sc_body(op##name, i, v); \ } @@ -94,7 +94,7 @@ ATOMIC64_FETCH_OPS(atomic64_sub_return) #undef ATOMIC64_FETCH_OP #undef ATOMIC64_FETCH_OPS -static inline long arch_atomic64_dec_if_positive(atomic64_t *v) +static __always_inline long arch_atomic64_dec_if_positive(atomic64_t *v) { return __lse_ll_sc_body(atomic64_dec_if_positive, v); } diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 0cd9f0f75c135f..cc060c41adaab6 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -214,6 +214,7 @@ enum vcpu_sysreg { #define c2_TTBR1 (TTBR1_EL1 * 2) /* Translation Table Base Register 1 */ #define c2_TTBR1_high (c2_TTBR1 + 1) /* TTBR1 top 32 bits */ #define c2_TTBCR (TCR_EL1 * 2) /* Translation Table Base Control R. */ +#define c2_TTBCR2 (c2_TTBCR + 1) /* Translation Table Base Control R. 2 */ #define c3_DACR (DACR32_EL2 * 2)/* Domain Access Control Register */ #define c5_DFSR (ESR_EL1 * 2) /* Data Fault Status Register */ #define c5_IFSR (IFSR32_EL2 * 2)/* Instruction Fault Status Register */ diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index cd61239bae8c25..75c8e9a350cc7b 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -238,11 +238,11 @@ static inline const void *__tag_set(const void *addr, u8 tag) /* - * The linear kernel range starts at the bottom of the virtual address - * space. Testing the top bit for the start of the region is a - * sufficient check and avoids having to worry about the tag. + * Check whether an arbitrary address is within the linear map, which + * lives in the [PAGE_OFFSET, PAGE_END) interval at the bottom of the + * kernel's TTBR1 address range. */ -#define __is_lm_address(addr) (!(((u64)addr) & BIT(vabits_actual - 1))) +#define __is_lm_address(addr) (((u64)(addr) ^ PAGE_OFFSET) < (PAGE_END - PAGE_OFFSET)) #define __lm_to_phys(addr) (((addr) & ~PAGE_OFFSET) + PHYS_OFFSET) #define __kimg_to_phys(addr) ((addr) - kimage_voffset) @@ -323,7 +323,7 @@ static inline void *phys_to_virt(phys_addr_t x) #endif /* !CONFIG_SPARSEMEM_VMEMMAP || CONFIG_DEBUG_VIRTUAL */ #define virt_addr_valid(addr) ({ \ - __typeof__(addr) __addr = addr; \ + __typeof__(addr) __addr = __tag_reset(addr); \ __is_lm_address(__addr) && pfn_valid(virt_to_pfn(__addr)); \ }) diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index fce8cbecd6bc72..a884d777398959 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -96,7 +96,8 @@ #endif /* CONFIG_ARM64_FORCE_52BIT */ extern phys_addr_t arm64_dma_phys_limit; -#define ARCH_LOW_ADDRESS_LIMIT (arm64_dma_phys_limit - 1) +extern phys_addr_t arm64_dma32_phys_limit; +#define ARCH_LOW_ADDRESS_LIMIT ((arm64_dma_phys_limit ? : arm64_dma32_phys_limit) - 1) struct debug_info { #ifdef CONFIG_HAVE_HW_BREAKPOINT diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 6f36c4f62f6945..0a52e076153bbf 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -2552,7 +2552,7 @@ static void verify_hyp_capabilities(void) int parange, ipa_max; unsigned int safe_vmid_bits, vmid_bits; - if (!IS_ENABLED(CONFIG_KVM) || !IS_ENABLED(CONFIG_KVM_ARM_HOST)) + if (!IS_ENABLED(CONFIG_KVM)) return; safe_mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1); diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c index 52a0638ed967b1..ef15c8a2a49dcc 100644 --- a/arch/arm64/kernel/mte.c +++ b/arch/arm64/kernel/mte.c @@ -189,7 +189,8 @@ long get_mte_ctrl(struct task_struct *task) switch (task->thread.sctlr_tcf0) { case SCTLR_EL1_TCF0_NONE: - return PR_MTE_TCF_NONE; + ret |= PR_MTE_TCF_NONE; + break; case SCTLR_EL1_TCF0_SYNC: ret |= PR_MTE_TCF_SYNC; break; diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index a8184cad889075..50852992752b04 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -914,13 +914,6 @@ static void do_signal(struct pt_regs *regs) asmlinkage void do_notify_resume(struct pt_regs *regs, unsigned long thread_flags) { - /* - * The assembly code enters us with IRQs off, but it hasn't - * informed the tracing code of that for efficiency reasons. - * Update the trace code with the current status. - */ - trace_hardirqs_off(); - do { /* Check valid user FS if needed */ addr_limit_user_check(); diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c index f8f758e4a3064e..6fa8cfb8232aae 100644 --- a/arch/arm64/kernel/syscall.c +++ b/arch/arm64/kernel/syscall.c @@ -165,15 +165,8 @@ static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr, if (!has_syscall_work(flags) && !IS_ENABLED(CONFIG_DEBUG_RSEQ)) { local_daif_mask(); flags = current_thread_info()->flags; - if (!has_syscall_work(flags) && !(flags & _TIF_SINGLESTEP)) { - /* - * We're off to userspace, where interrupts are - * always enabled after we restore the flags from - * the SPSR. - */ - trace_hardirqs_on(); + if (!has_syscall_work(flags) && !(flags & _TIF_SINGLESTEP)) return; - } local_daif_restore(DAIF_PROCCTX); } diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 2ed5ef8f274b14..2dd164bb1c5a91 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -788,7 +788,7 @@ u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1) { unsigned long *bmap = vcpu->kvm->arch.pmu_filter; u64 val, mask = 0; - int base, i; + int base, i, nr_events; if (!pmceid1) { val = read_sysreg(pmceid0_el0); @@ -801,13 +801,17 @@ u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1) if (!bmap) return val; + nr_events = kvm_pmu_event_mask(vcpu->kvm) + 1; + for (i = 0; i < 32; i += 8) { u64 byte; byte = bitmap_get_value8(bmap, base + i); mask |= byte << i; - byte = bitmap_get_value8(bmap, 0x4000 + base + i); - mask |= byte << (32 + i); + if (nr_events >= (0x4000 + base + 32)) { + byte = bitmap_get_value8(bmap, 0x4000 + base + i); + mask |= byte << (32 + i); + } } return val & mask; diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index c1fac9836af1af..b246a4acba416c 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -663,6 +663,10 @@ static void reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { u64 pmcr, val; + /* No PMU available, PMCR_EL0 may UNDEF... */ + if (!kvm_arm_support_pmu_v3()) + return; + pmcr = read_sysreg(pmcr_el0); /* * Writable bits of PMCR_EL0 (ARMV8_PMU_PMCR_MASK) are reset to UNKNOWN @@ -1987,6 +1991,7 @@ static const struct sys_reg_desc cp15_regs[] = { { Op1( 0), CRn( 2), CRm( 0), Op2( 0), access_vm_reg, NULL, c2_TTBR0 }, { Op1( 0), CRn( 2), CRm( 0), Op2( 1), access_vm_reg, NULL, c2_TTBR1 }, { Op1( 0), CRn( 2), CRm( 0), Op2( 2), access_vm_reg, NULL, c2_TTBCR }, + { Op1( 0), CRn( 2), CRm( 0), Op2( 3), access_vm_reg, NULL, c2_TTBCR2 }, { Op1( 0), CRn( 3), CRm( 0), Op2( 0), access_vm_reg, NULL, c3_DACR }, { Op1( 0), CRn( 5), CRm( 0), Op2( 0), access_vm_reg, NULL, c5_DFSR }, { Op1( 0), CRn( 5), CRm( 0), Op2( 1), access_vm_reg, NULL, c5_IFSR }, diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 095540667f0fdd..00576a960f11f8 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -60,7 +60,7 @@ EXPORT_SYMBOL(memstart_addr); * bit addressable memory area. */ phys_addr_t arm64_dma_phys_limit __ro_after_init; -static phys_addr_t arm64_dma32_phys_limit __ro_after_init; +phys_addr_t arm64_dma32_phys_limit __ro_after_init; #ifdef CONFIG_KEXEC_CORE /* diff --git a/arch/arm64/mm/physaddr.c b/arch/arm64/mm/physaddr.c index 67a9ba9eaa96bd..cde44c13dda1bc 100644 --- a/arch/arm64/mm/physaddr.c +++ b/arch/arm64/mm/physaddr.c @@ -9,7 +9,7 @@ phys_addr_t __virt_to_phys(unsigned long x) { - WARN(!__is_lm_address(x), + WARN(!__is_lm_address(__tag_reset(x)), "virt_to_phys used for non-linear address: %pK (%pS)\n", (void *)x, (void *)x); diff --git a/arch/csky/include/asm/Kbuild b/arch/csky/include/asm/Kbuild index 64876e59e2ef9f..2a5a4d94fafadf 100644 --- a/arch/csky/include/asm/Kbuild +++ b/arch/csky/include/asm/Kbuild @@ -2,7 +2,6 @@ generic-y += asm-offsets.h generic-y += gpio.h generic-y += kvm_para.h -generic-y += local64.h generic-y += qrwlock.h generic-y += seccomp.h generic-y += user.h diff --git a/arch/h8300/include/asm/Kbuild b/arch/h8300/include/asm/Kbuild index ddf04f32b54675..60ee7f0d60a8ff 100644 --- a/arch/h8300/include/asm/Kbuild +++ b/arch/h8300/include/asm/Kbuild @@ -2,7 +2,6 @@ generic-y += asm-offsets.h generic-y += extable.h generic-y += kvm_para.h -generic-y += local64.h generic-y += mcs_spinlock.h generic-y += parport.h generic-y += spinlock.h diff --git a/arch/hexagon/include/asm/Kbuild b/arch/hexagon/include/asm/Kbuild index 373964bb177e41..3ece3c93fe086e 100644 --- a/arch/hexagon/include/asm/Kbuild +++ b/arch/hexagon/include/asm/Kbuild @@ -2,5 +2,4 @@ generic-y += extable.h generic-y += iomap.h generic-y += kvm_para.h -generic-y += local64.h generic-y += mcs_spinlock.h diff --git a/arch/ia64/include/asm/local64.h b/arch/ia64/include/asm/local64.h deleted file mode 100644 index 36c93b5cc239b6..00000000000000 --- a/arch/ia64/include/asm/local64.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index ef12e097f31845..27ca549ff47ed1 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -536,7 +536,7 @@ virtual_memmap_init(u64 start, u64 end, void *arg) if (map_start < map_end) memmap_init_zone((unsigned long)(map_end - map_start), - args->nid, args->zone, page_to_pfn(map_start), + args->nid, args->zone, page_to_pfn(map_start), page_to_pfn(map_end), MEMINIT_EARLY, NULL, MIGRATE_MOVABLE); return 0; } @@ -546,7 +546,7 @@ memmap_init (unsigned long size, int nid, unsigned long zone, unsigned long start_pfn) { if (!vmem_map) { - memmap_init_zone(size, nid, zone, start_pfn, + memmap_init_zone(size, nid, zone, start_pfn, start_pfn + size, MEMINIT_EARLY, NULL, MIGRATE_MOVABLE); } else { struct page *start; diff --git a/arch/m68k/include/asm/Kbuild b/arch/m68k/include/asm/Kbuild index 1bff55aa2d54e2..0dbf9c5c6faeb3 100644 --- a/arch/m68k/include/asm/Kbuild +++ b/arch/m68k/include/asm/Kbuild @@ -2,6 +2,5 @@ generated-y += syscall_table.h generic-y += extable.h generic-y += kvm_para.h -generic-y += local64.h generic-y += mcs_spinlock.h generic-y += spinlock.h diff --git a/arch/m68k/mac/config.c b/arch/m68k/mac/config.c index 0ac53d87493c88..2bea1799b8de74 100644 --- a/arch/m68k/mac/config.c +++ b/arch/m68k/mac/config.c @@ -777,16 +777,12 @@ static struct resource scc_b_rsrcs[] = { struct platform_device scc_a_pdev = { .name = "scc", .id = 0, - .num_resources = ARRAY_SIZE(scc_a_rsrcs), - .resource = scc_a_rsrcs, }; EXPORT_SYMBOL(scc_a_pdev); struct platform_device scc_b_pdev = { .name = "scc", .id = 1, - .num_resources = ARRAY_SIZE(scc_b_rsrcs), - .resource = scc_b_rsrcs, }; EXPORT_SYMBOL(scc_b_pdev); @@ -813,10 +809,15 @@ static void __init mac_identify(void) /* Set up serial port resources for the console initcall. */ - scc_a_rsrcs[0].start = (resource_size_t) mac_bi_data.sccbase + 2; - scc_a_rsrcs[0].end = scc_a_rsrcs[0].start; - scc_b_rsrcs[0].start = (resource_size_t) mac_bi_data.sccbase; - scc_b_rsrcs[0].end = scc_b_rsrcs[0].start; + scc_a_rsrcs[0].start = (resource_size_t)mac_bi_data.sccbase + 2; + scc_a_rsrcs[0].end = scc_a_rsrcs[0].start; + scc_a_pdev.num_resources = ARRAY_SIZE(scc_a_rsrcs); + scc_a_pdev.resource = scc_a_rsrcs; + + scc_b_rsrcs[0].start = (resource_size_t)mac_bi_data.sccbase; + scc_b_rsrcs[0].end = scc_b_rsrcs[0].start; + scc_b_pdev.num_resources = ARRAY_SIZE(scc_b_rsrcs); + scc_b_pdev.resource = scc_b_rsrcs; switch (macintosh_config->scc_type) { case MAC_SCC_PSC: diff --git a/arch/microblaze/include/asm/Kbuild b/arch/microblaze/include/asm/Kbuild index 63bce836b9f10f..29b0e557aa7c5b 100644 --- a/arch/microblaze/include/asm/Kbuild +++ b/arch/microblaze/include/asm/Kbuild @@ -2,7 +2,6 @@ generated-y += syscall_table.h generic-y += extable.h generic-y += kvm_para.h -generic-y += local64.h generic-y += mcs_spinlock.h generic-y += parport.h generic-y += syscalls.h diff --git a/arch/mips/bcm47xx/Kconfig b/arch/mips/bcm47xx/Kconfig index 6889f74e06f541..490bb6da74b7e6 100644 --- a/arch/mips/bcm47xx/Kconfig +++ b/arch/mips/bcm47xx/Kconfig @@ -27,6 +27,7 @@ config BCM47XX_BCMA select BCMA select BCMA_HOST_SOC select BCMA_DRIVER_MIPS + select BCMA_DRIVER_PCI if PCI select BCMA_DRIVER_PCI_HOSTMODE if PCI select BCMA_DRIVER_GPIO default y diff --git a/arch/mips/boot/compressed/decompress.c b/arch/mips/boot/compressed/decompress.c index c61c641674e6b4..e3946b06e840a6 100644 --- a/arch/mips/boot/compressed/decompress.c +++ b/arch/mips/boot/compressed/decompress.c @@ -13,6 +13,7 @@ #include #include +#include /* * These two variables specify the free mem region @@ -117,7 +118,7 @@ void decompress_kernel(unsigned long boot_heap_start) dtb_size = fdt_totalsize((void *)&__appended_dtb); /* last four bytes is always image size in little endian */ - image_size = le32_to_cpup((void *)&__image_end - 4); + image_size = get_unaligned_le32((void *)&__image_end - 4); /* copy dtb to where the booted kernel will expect it */ memcpy((void *)VMLINUX_LOAD_ADDRESS_ULL + image_size, diff --git a/arch/mips/include/asm/Kbuild b/arch/mips/include/asm/Kbuild index 198b3bafdac978..95b4fa7bd0d1fd 100644 --- a/arch/mips/include/asm/Kbuild +++ b/arch/mips/include/asm/Kbuild @@ -6,7 +6,6 @@ generated-y += syscall_table_64_n64.h generated-y += syscall_table_64_o32.h generic-y += export.h generic-y += kvm_para.h -generic-y += local64.h generic-y += mcs_spinlock.h generic-y += parport.h generic-y += qrwlock.h diff --git a/arch/mips/kernel/binfmt_elfn32.c b/arch/mips/kernel/binfmt_elfn32.c index 6ee3f7218c675b..c4441416e96b6a 100644 --- a/arch/mips/kernel/binfmt_elfn32.c +++ b/arch/mips/kernel/binfmt_elfn32.c @@ -103,4 +103,11 @@ jiffies_to_old_timeval32(unsigned long jiffies, struct old_timeval32 *value) #undef ns_to_kernel_old_timeval #define ns_to_kernel_old_timeval ns_to_old_timeval32 +/* + * Some data types as stored in coredump. + */ +#define user_long_t compat_long_t +#define user_siginfo_t compat_siginfo_t +#define copy_siginfo_to_external copy_siginfo_to_external32 + #include "../../../fs/binfmt_elf.c" diff --git a/arch/mips/kernel/binfmt_elfo32.c b/arch/mips/kernel/binfmt_elfo32.c index 6dd103d3cebba2..7b2a23f48c1ac2 100644 --- a/arch/mips/kernel/binfmt_elfo32.c +++ b/arch/mips/kernel/binfmt_elfo32.c @@ -106,4 +106,11 @@ jiffies_to_old_timeval32(unsigned long jiffies, struct old_timeval32 *value) #undef ns_to_kernel_old_timeval #define ns_to_kernel_old_timeval ns_to_old_timeval32 +/* + * Some data types as stored in coredump. + */ +#define user_long_t compat_long_t +#define user_siginfo_t compat_siginfo_t +#define copy_siginfo_to_external copy_siginfo_to_external32 + #include "../../../fs/binfmt_elf.c" diff --git a/arch/mips/kernel/relocate.c b/arch/mips/kernel/relocate.c index 3d80a51256de6a..dab8febb574195 100644 --- a/arch/mips/kernel/relocate.c +++ b/arch/mips/kernel/relocate.c @@ -187,8 +187,14 @@ static int __init relocate_exception_table(long offset) static inline __init unsigned long rotate_xor(unsigned long hash, const void *area, size_t size) { - size_t i; - unsigned long *ptr = (unsigned long *)area; + const typeof(hash) *ptr = PTR_ALIGN(area, sizeof(hash)); + size_t diff, i; + + diff = (void *)ptr - area; + if (unlikely(size < diff + sizeof(hash))) + return hash; + + size = ALIGN_DOWN(size - diff, sizeof(hash)); for (i = 0; i < size / sizeof(hash); i++) { /* Rotate by odd number of bits and XOR. */ diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c index ca579deef93916..9d11f68a9e8bb4 100644 --- a/arch/mips/kernel/setup.c +++ b/arch/mips/kernel/setup.c @@ -498,8 +498,8 @@ static void __init request_crashkernel(struct resource *res) static void __init check_kernel_sections_mem(void) { - phys_addr_t start = PFN_PHYS(PFN_DOWN(__pa_symbol(&_text))); - phys_addr_t size = PFN_PHYS(PFN_UP(__pa_symbol(&_end))) - start; + phys_addr_t start = __pa_symbol(&_text); + phys_addr_t size = __pa_symbol(&_end) - start; if (!memblock_is_region_memory(start, size)) { pr_info("Kernel sections are not in the memory maps\n"); diff --git a/arch/mips/lib/uncached.c b/arch/mips/lib/uncached.c index 09d5deea747f2f..f80a67c092b63f 100644 --- a/arch/mips/lib/uncached.c +++ b/arch/mips/lib/uncached.c @@ -37,10 +37,12 @@ */ unsigned long run_uncached(void *func) { - register long sp __asm__("$sp"); register long ret __asm__("$2"); long lfunc = (long)func, ufunc; long usp; + long sp; + + __asm__("move %0, $sp" : "=r" (sp)); if (sp >= (long)CKSEG0 && sp < (long)CKSEG2) usp = CKSEG1ADDR(sp); diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c index 9cede7ce37e666..c9644c38ec28f8 100644 --- a/arch/mips/mm/c-r4k.c +++ b/arch/mips/mm/c-r4k.c @@ -1609,7 +1609,7 @@ static void __init loongson2_sc_init(void) c->options |= MIPS_CPU_INCLUSIVE_CACHES; } -static void __init loongson3_sc_init(void) +static void loongson3_sc_init(void) { struct cpuinfo_mips *c = ¤t_cpu_data; unsigned int config2, lsize; diff --git a/arch/mips/mm/sc-mips.c b/arch/mips/mm/sc-mips.c index dd0a5becaabd8e..06ec304ad4d16e 100644 --- a/arch/mips/mm/sc-mips.c +++ b/arch/mips/mm/sc-mips.c @@ -146,7 +146,7 @@ static inline int mips_sc_is_activated(struct cpuinfo_mips *c) return 1; } -static int __init mips_sc_probe_cm3(void) +static int mips_sc_probe_cm3(void) { struct cpuinfo_mips *c = ¤t_cpu_data; unsigned long cfg = read_gcr_l2_config(); @@ -180,7 +180,7 @@ static int __init mips_sc_probe_cm3(void) return 0; } -static inline int __init mips_sc_probe(void) +static inline int mips_sc_probe(void) { struct cpuinfo_mips *c = ¤t_cpu_data; unsigned int config1, config2; diff --git a/arch/nds32/include/asm/Kbuild b/arch/nds32/include/asm/Kbuild index ff1e94299317dd..82a4453c9c2d52 100644 --- a/arch/nds32/include/asm/Kbuild +++ b/arch/nds32/include/asm/Kbuild @@ -4,6 +4,5 @@ generic-y += cmpxchg.h generic-y += export.h generic-y += gpio.h generic-y += kvm_para.h -generic-y += local64.h generic-y += parport.h generic-y += user.h diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index b234e8154cbd4d..04dc17d52ac2d2 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -202,9 +202,8 @@ config PREFETCH depends on PA8X00 || PA7200 config MLONGCALLS - bool "Enable the -mlong-calls compiler option for big kernels" - default y if !MODULES || UBSAN || FTRACE - default n + def_bool y if !MODULES || UBSAN || FTRACE + bool "Enable the -mlong-calls compiler option for big kernels" if MODULES && !UBSAN && !FTRACE depends on PA8X00 help If you configure the kernel to include many drivers built-in instead diff --git a/arch/parisc/include/asm/Kbuild b/arch/parisc/include/asm/Kbuild index e3ee5c0bfe80fe..a1bd2adc63e3a8 100644 --- a/arch/parisc/include/asm/Kbuild +++ b/arch/parisc/include/asm/Kbuild @@ -3,7 +3,6 @@ generated-y += syscall_table_32.h generated-y += syscall_table_64.h generated-y += syscall_table_c32.h generic-y += kvm_para.h -generic-y += local64.h generic-y += mcs_spinlock.h generic-y += seccomp.h generic-y += user.h diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S index f6f28e41bb5e08..5d8123eb38ec59 100644 --- a/arch/parisc/kernel/entry.S +++ b/arch/parisc/kernel/entry.S @@ -997,10 +997,17 @@ intr_do_preempt: bb,<,n %r20, 31 - PSW_SM_I, intr_restore nop + /* ssm PSW_SM_I done later in intr_restore */ +#ifdef CONFIG_MLONGCALLS + ldil L%intr_restore, %r2 + load32 preempt_schedule_irq, %r1 + bv %r0(%r1) + ldo R%intr_restore(%r2), %r2 +#else + ldil L%intr_restore, %r1 BL preempt_schedule_irq, %r2 - nop - - b,n intr_restore /* ssm PSW_SM_I done by intr_restore */ + ldo R%intr_restore(%r1), %r2 +#endif #endif /* CONFIG_PREEMPTION */ /* diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index f8ce6d2dde7b1f..e4b364b5da9e7a 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -368,6 +368,8 @@ initrd-y := $(filter-out $(image-y), $(initrd-y)) targets += $(image-y) $(initrd-y) targets += $(foreach x, dtbImage uImage cuImage simpleImage treeImage, \ $(patsubst $(x).%, dts/%.dtb, $(filter $(x).%, $(image-y)))) +targets += $(foreach x, dtbImage uImage cuImage simpleImage treeImage, \ + $(patsubst $(x).%, dts/fsl/%.dtb, $(filter $(x).%, $(image-y)))) $(addprefix $(obj)/, $(initrd-y)): $(obj)/ramdisk.image.gz diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild index 90cd5c53af6664..e1f9b4ea1c537b 100644 --- a/arch/powerpc/include/asm/Kbuild +++ b/arch/powerpc/include/asm/Kbuild @@ -5,7 +5,6 @@ generated-y += syscall_table_c32.h generated-y += syscall_table_spu.h generic-y += export.h generic-y += kvm_types.h -generic-y += local64.h generic-y += mcs_spinlock.h generic-y += qrwlock.h generic-y += vtime.h diff --git a/arch/powerpc/include/asm/bitops.h b/arch/powerpc/include/asm/bitops.h index 4a4d3afd534065..299ab33505a6c8 100644 --- a/arch/powerpc/include/asm/bitops.h +++ b/arch/powerpc/include/asm/bitops.h @@ -216,15 +216,34 @@ static inline void arch___clear_bit_unlock(int nr, volatile unsigned long *addr) */ static inline int fls(unsigned int x) { - return 32 - __builtin_clz(x); + int lz; + + if (__builtin_constant_p(x)) + return x ? 32 - __builtin_clz(x) : 0; + asm("cntlzw %0,%1" : "=r" (lz) : "r" (x)); + return 32 - lz; } #include +/* + * 64-bit can do this using one cntlzd (count leading zeroes doubleword) + * instruction; for 32-bit we use the generic version, which does two + * 32-bit fls calls. + */ +#ifdef CONFIG_PPC64 static inline int fls64(__u64 x) { - return 64 - __builtin_clzll(x); + int lz; + + if (__builtin_constant_p(x)) + return x ? 64 - __builtin_clzll(x) : 0; + asm("cntlzd %0,%1" : "=r" (lz) : "r" (x)); + return 64 - lz; } +#else +#include +#endif #ifdef CONFIG_PPC64 unsigned int __arch_hweight8(unsigned int w); diff --git a/arch/powerpc/include/asm/book3s/32/mmu-hash.h b/arch/powerpc/include/asm/book3s/32/mmu-hash.h index 2e277ca0170fbb..a8982d52f6b1d6 100644 --- a/arch/powerpc/include/asm/book3s/32/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/32/mmu-hash.h @@ -94,6 +94,7 @@ typedef struct { } mm_context_t; void update_bats(void); +static inline void cleanup_cpu_mmu_context(void) { }; /* patch sites */ extern s32 patch__hash_page_A0, patch__hash_page_A1, patch__hash_page_A2; diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h index 1376be95e975f4..523d3e6e240091 100644 --- a/arch/powerpc/include/asm/book3s/32/pgtable.h +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h @@ -524,9 +524,9 @@ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr, if (pte_val(*ptep) & _PAGE_HASHPTE) flush_hash_entry(mm, ptep, addr); __asm__ __volatile__("\ - stw%U0%X0 %2,%0\n\ + stw%X0 %2,%0\n\ eieio\n\ - stw%U0%X0 %L2,%1" + stw%X1 %L2,%1" : "=m" (*ptep), "=m" (*((unsigned char *)ptep+4)) : "r" (pte) : "memory"); diff --git a/arch/powerpc/include/asm/cpm1.h b/arch/powerpc/include/asm/cpm1.h index a116fe9317892e..3bdd74739cb88e 100644 --- a/arch/powerpc/include/asm/cpm1.h +++ b/arch/powerpc/include/asm/cpm1.h @@ -68,6 +68,7 @@ extern void cpm_reset(void); #define PROFF_SPI ((uint)0x0180) #define PROFF_SCC3 ((uint)0x0200) #define PROFF_SMC1 ((uint)0x0280) +#define PROFF_DSP1 ((uint)0x02c0) #define PROFF_SCC4 ((uint)0x0300) #define PROFF_SMC2 ((uint)0x0380) diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index 3d2f94afc13ae7..398eba39987901 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -369,7 +369,7 @@ static inline void cpu_feature_keys_init(void) { } CPU_FTR_PPC_LE | CPU_FTR_NEED_PAIRED_STWCX) #define CPU_FTRS_82XX (CPU_FTR_COMMON | CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_NOEXECUTE) #define CPU_FTRS_G2_LE (CPU_FTR_COMMON | CPU_FTR_MAYBE_CAN_DOZE | \ - CPU_FTR_MAYBE_CAN_NAP) + CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_NOEXECUTE) #define CPU_FTRS_E300 (CPU_FTR_MAYBE_CAN_DOZE | \ CPU_FTR_MAYBE_CAN_NAP | \ CPU_FTR_COMMON | CPU_FTR_NOEXECUTE) @@ -409,7 +409,6 @@ static inline void cpu_feature_keys_init(void) { } CPU_FTR_DBELL | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \ CPU_FTR_DEBUG_LVL_EXC | CPU_FTR_EMB_HV | CPU_FTR_ALTIVEC_COMP | \ CPU_FTR_CELL_TB_BUG | CPU_FTR_SMT) -#define CPU_FTRS_GENERIC_32 (CPU_FTR_COMMON | CPU_FTR_NODSISRALIGN) /* 64-bit CPUs */ #define CPU_FTRS_PPC970 (CPU_FTR_LWSYNC | \ @@ -520,8 +519,6 @@ enum { CPU_FTRS_7447 | CPU_FTRS_7447A | CPU_FTRS_82XX | CPU_FTRS_G2_LE | CPU_FTRS_E300 | CPU_FTRS_E300C2 | CPU_FTRS_CLASSIC32 | -#else - CPU_FTRS_GENERIC_32 | #endif #ifdef CONFIG_PPC_8xx CPU_FTRS_8XX | @@ -596,8 +593,6 @@ enum { CPU_FTRS_7447 & CPU_FTRS_7447A & CPU_FTRS_82XX & CPU_FTRS_G2_LE & CPU_FTRS_E300 & CPU_FTRS_E300C2 & CPU_FTRS_CLASSIC32 & -#else - CPU_FTRS_GENERIC_32 & #endif #ifdef CONFIG_PPC_8xx CPU_FTRS_8XX & diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 1d32b174ab6aec..c1a8aac01cf91d 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -63,6 +63,12 @@ nop; \ nop; +#define SCV_ENTRY_FLUSH_SLOT \ + SCV_ENTRY_FLUSH_FIXUP_SECTION; \ + nop; \ + nop; \ + nop; + /* * r10 must be free to use, r13 must be paca */ @@ -70,6 +76,13 @@ STF_ENTRY_BARRIER_SLOT; \ ENTRY_FLUSH_SLOT +/* + * r10, ctr must be free to use, r13 must be paca + */ +#define SCV_INTERRUPT_TO_KERNEL \ + STF_ENTRY_BARRIER_SLOT; \ + SCV_ENTRY_FLUSH_SLOT + /* * Macros for annotating the expected destination of (h)rfid * diff --git a/arch/powerpc/include/asm/feature-fixups.h b/arch/powerpc/include/asm/feature-fixups.h index fbd406cd6916c9..8d100059e266ca 100644 --- a/arch/powerpc/include/asm/feature-fixups.h +++ b/arch/powerpc/include/asm/feature-fixups.h @@ -221,6 +221,14 @@ label##3: \ FTR_ENTRY_OFFSET 957b-958b; \ .popsection; +#define SCV_ENTRY_FLUSH_FIXUP_SECTION \ +957: \ + .pushsection __scv_entry_flush_fixup,"a"; \ + .align 2; \ +958: \ + FTR_ENTRY_OFFSET 957b-958b; \ + .popsection; + #define RFI_FLUSH_FIXUP_SECTION \ 951: \ .pushsection __rfi_flush_fixup,"a"; \ @@ -254,10 +262,12 @@ label##3: \ extern long stf_barrier_fallback; extern long entry_flush_fallback; +extern long scv_entry_flush_fallback; extern long __start___stf_entry_barrier_fixup, __stop___stf_entry_barrier_fixup; extern long __start___stf_exit_barrier_fixup, __stop___stf_exit_barrier_fixup; extern long __start___uaccess_flush_fixup, __stop___uaccess_flush_fixup; extern long __start___entry_flush_fixup, __stop___entry_flush_fixup; +extern long __start___scv_entry_flush_fixup, __stop___scv_entry_flush_fixup; extern long __start___rfi_flush_fixup, __stop___rfi_flush_fixup; extern long __start___barrier_nospec_fixup, __stop___barrier_nospec_fixup; extern long __start__btb_flush_fixup, __stop__btb_flush_fixup; diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h index 6277e7596ae58f..ac75f4ab0dba1b 100644 --- a/arch/powerpc/include/asm/nohash/pgtable.h +++ b/arch/powerpc/include/asm/nohash/pgtable.h @@ -192,9 +192,9 @@ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr, */ if (IS_ENABLED(CONFIG_PPC32) && IS_ENABLED(CONFIG_PTE_64BIT) && !percpu) { __asm__ __volatile__("\ - stw%U0%X0 %2,%0\n\ + stw%X0 %2,%0\n\ eieio\n\ - stw%U0%X0 %L2,%1" + stw%X1 %L2,%1" : "=m" (*ptep), "=m" (*((unsigned char *)ptep+4)) : "r" (pte) : "memory"); return; diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index bf0bf1b900d212..fe2ef598e2ead0 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -173,6 +173,9 @@ KCOV_INSTRUMENT_cputable.o := n KCOV_INSTRUMENT_setup_64.o := n KCOV_INSTRUMENT_paca.o := n +CFLAGS_setup_64.o += -fno-stack-protector +CFLAGS_paca.o += -fno-stack-protector + extra-$(CONFIG_PPC_FPU) += fpu.o extra-$(CONFIG_ALTIVEC) += vector.o extra-$(CONFIG_PPC64) += entry_64.o diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 2f3846192ec7df..2831b0aa92b15e 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -75,7 +75,7 @@ BEGIN_FTR_SECTION bne .Ltabort_syscall END_FTR_SECTION_IFSET(CPU_FTR_TM) #endif - INTERRUPT_TO_KERNEL + SCV_INTERRUPT_TO_KERNEL mr r10,r1 ld r1,PACAKSAVE(r13) std r10,0(r1) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 4d01f09ecf8081..3cde2fbd74fce5 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -2993,6 +2993,25 @@ TRAMP_REAL_BEGIN(entry_flush_fallback) ld r11,PACA_EXRFI+EX_R11(r13) blr +/* + * The SCV entry flush happens with interrupts enabled, so it must disable + * to prevent EXRFI being clobbered by NMIs (e.g., soft_nmi_common). r10 + * (containing LR) does not need to be preserved here because scv entry + * puts 0 in the pt_regs, CTR can be clobbered for the same reason. + */ +TRAMP_REAL_BEGIN(scv_entry_flush_fallback) + li r10,0 + mtmsrd r10,1 + lbz r10,PACAIRQHAPPENED(r13) + ori r10,r10,PACA_IRQ_HARD_DIS + stb r10,PACAIRQHAPPENED(r13) + std r11,PACA_EXRFI+EX_R11(r13) + L1D_DISPLACEMENT_FLUSH + ld r11,PACA_EXRFI+EX_R11(r13) + li r10,MSR_RI + mtmsrd r10,1 + blr + TRAMP_REAL_BEGIN(rfi_flush_fallback) SET_SCRATCH0(r13); GET_PACA(r13); diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index 7c767765071dac..c88e66adecb52b 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -131,18 +131,28 @@ #ifdef CONFIG_VMAP_STACK mfspr r11, SPRN_SRR0 mtctr r11 -#endif andi. r11, r9, MSR_PR - lwz r11,TASK_STACK-THREAD(r12) + mr r11, r1 + lwz r1,TASK_STACK-THREAD(r12) beq- 99f - addi r11, r11, THREAD_SIZE - INT_FRAME_SIZE -#ifdef CONFIG_VMAP_STACK + addi r1, r1, THREAD_SIZE - INT_FRAME_SIZE li r10, MSR_KERNEL & ~(MSR_IR | MSR_RI) /* can take DTLB miss */ mtmsr r10 isync + tovirt(r12, r12) + stw r11,GPR1(r1) + stw r11,0(r1) + mr r11, r1 +#else + andi. r11, r9, MSR_PR + lwz r11,TASK_STACK-THREAD(r12) + beq- 99f + addi r11, r11, THREAD_SIZE - INT_FRAME_SIZE + tophys(r11, r11) + stw r1,GPR1(r11) + stw r1,0(r11) + tovirt(r1, r11) /* set new kernel sp */ #endif - tovirt_vmstack r12, r12 - tophys_novmstack r11, r11 mflr r10 stw r10, _LINK(r11) #ifdef CONFIG_VMAP_STACK @@ -150,9 +160,6 @@ #else mfspr r10,SPRN_SRR0 #endif - stw r1,GPR1(r11) - stw r1,0(r11) - tovirt_novmstack r1, r11 /* set new kernel sp */ stw r10,_NIP(r11) mfcr r10 rlwinm r10,r10,0,4,2 /* Clear SO bit in CR */ diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 1510b2a56669fe..2d6581db0c7b6f 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -417,6 +417,10 @@ generic_secondary_common_init: /* From now on, r24 is expected to be logical cpuid */ mr r24,r5 + /* Create a temp kernel stack for use before relocation is on. */ + ld r1,PACAEMERGSP(r13) + subi r1,r1,STACK_FRAME_OVERHEAD + /* See if we need to call a cpu state restore handler */ LOAD_REG_ADDR(r23, cur_cpu_spec) ld r23,0(r23) @@ -445,10 +449,6 @@ generic_secondary_common_init: sync /* order paca.run and cur_cpu_spec */ isync /* In case code patching happened */ - /* Create a temp kernel stack for use before relocation is on. */ - ld r1,PACAEMERGSP(r13) - subi r1,r1,STACK_FRAME_OVERHEAD - b __secondary_start #endif /* SMP */ @@ -990,7 +990,7 @@ start_here_common: bl start_kernel /* Not reached */ - trap +0: trap EMIT_BUG_ENTRY 0b, __FILE__, __LINE__, 0 .previous diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index a0dda2a1f2df0a..d66da35f2e8d36 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -262,10 +262,19 @@ __secondary_hold_acknowledge: MachineCheck: EXCEPTION_PROLOG_0 #ifdef CONFIG_PPC_CHRP +#ifdef CONFIG_VMAP_STACK + mr r11, r1 + mfspr r1, SPRN_SPRG_THREAD + lwz r1, RTAS_SP(r1) + cmpwi cr1, r1, 0 + bne cr1, 7f + mr r1, r11 +#else mfspr r11, SPRN_SPRG_THREAD lwz r11, RTAS_SP(r11) cmpwi cr1, r11, 0 bne cr1, 7f +#endif #endif /* CONFIG_PPC_CHRP */ EXCEPTION_PROLOG_1 for_rtas=1 7: EXCEPTION_PROLOG_2 diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 7d0f7682d01df6..cc7a6271b6b4ec 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -102,14 +102,6 @@ static inline notrace unsigned long get_irq_happened(void) return happened; } -static inline notrace int decrementer_check_overflow(void) -{ - u64 now = get_tb(); - u64 *next_tb = this_cpu_ptr(&decrementers_next_tb); - - return now >= *next_tb; -} - #ifdef CONFIG_PPC_BOOK3E /* This is called whenever we are re-enabling interrupts @@ -142,35 +134,6 @@ notrace unsigned int __check_irq_replay(void) trace_hardirqs_on(); trace_hardirqs_off(); - /* - * We are always hard disabled here, but PACA_IRQ_HARD_DIS may - * not be set, which means interrupts have only just been hard - * disabled as part of the local_irq_restore or interrupt return - * code. In that case, skip the decrementr check becaus it's - * expensive to read the TB. - * - * HARD_DIS then gets cleared here, but it's reconciled later. - * Either local_irq_disable will replay the interrupt and that - * will reconcile state like other hard interrupts. Or interrupt - * retur will replay the interrupt and in that case it sets - * PACA_IRQ_HARD_DIS by hand (see comments in entry_64.S). - */ - if (happened & PACA_IRQ_HARD_DIS) { - local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS; - - /* - * We may have missed a decrementer interrupt if hard disabled. - * Check the decrementer register in case we had a rollover - * while hard disabled. - */ - if (!(happened & PACA_IRQ_DEC)) { - if (decrementer_check_overflow()) { - local_paca->irq_happened |= PACA_IRQ_DEC; - happened |= PACA_IRQ_DEC; - } - } - } - if (happened & PACA_IRQ_DEC) { local_paca->irq_happened &= ~PACA_IRQ_DEC; return 0x900; @@ -186,6 +149,9 @@ notrace unsigned int __check_irq_replay(void) return 0x280; } + if (happened & PACA_IRQ_HARD_DIS) + local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS; + /* There should be nothing left ! */ BUG_ON(local_paca->irq_happened != 0); @@ -214,13 +180,18 @@ void notrace restore_interrupts(void) void replay_soft_interrupts(void) { + struct pt_regs regs; + /* - * We use local_paca rather than get_paca() to avoid all - * the debug_smp_processor_id() business in this low level - * function + * Be careful here, calling these interrupt handlers can cause + * softirqs to be raised, which they may run when calling irq_exit, + * which will cause local_irq_enable() to be run, which can then + * recurse into this function. Don't keep any state across + * interrupt handler calls which may change underneath us. + * + * We use local_paca rather than get_paca() to avoid all the + * debug_smp_processor_id() business in this low level function. */ - unsigned char happened = local_paca->irq_happened; - struct pt_regs regs; ppc_save_regs(®s); regs.softe = IRQS_ENABLED; @@ -229,18 +200,6 @@ void replay_soft_interrupts(void) if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) WARN_ON_ONCE(mfmsr() & MSR_EE); - if (happened & PACA_IRQ_HARD_DIS) { - /* - * We may have missed a decrementer interrupt if hard disabled. - * Check the decrementer register in case we had a rollover - * while hard disabled. - */ - if (!(happened & PACA_IRQ_DEC)) { - if (decrementer_check_overflow()) - happened |= PACA_IRQ_DEC; - } - } - /* * Force the delivery of pending soft-disabled interrupts on PS3. * Any HV call will have this side effect. @@ -255,7 +214,7 @@ void replay_soft_interrupts(void) * This is a higher priority interrupt than the others, so * replay it first. */ - if (IS_ENABLED(CONFIG_PPC_BOOK3S) && (happened & PACA_IRQ_HMI)) { + if (IS_ENABLED(CONFIG_PPC_BOOK3S) && (local_paca->irq_happened & PACA_IRQ_HMI)) { local_paca->irq_happened &= ~PACA_IRQ_HMI; regs.trap = 0xe60; handle_hmi_exception(®s); @@ -263,7 +222,7 @@ void replay_soft_interrupts(void) hard_irq_disable(); } - if (happened & PACA_IRQ_DEC) { + if (local_paca->irq_happened & PACA_IRQ_DEC) { local_paca->irq_happened &= ~PACA_IRQ_DEC; regs.trap = 0x900; timer_interrupt(®s); @@ -271,7 +230,7 @@ void replay_soft_interrupts(void) hard_irq_disable(); } - if (happened & PACA_IRQ_EE) { + if (local_paca->irq_happened & PACA_IRQ_EE) { local_paca->irq_happened &= ~PACA_IRQ_EE; regs.trap = 0x500; do_IRQ(®s); @@ -279,7 +238,7 @@ void replay_soft_interrupts(void) hard_irq_disable(); } - if (IS_ENABLED(CONFIG_PPC_DOORBELL) && (happened & PACA_IRQ_DBELL)) { + if (IS_ENABLED(CONFIG_PPC_DOORBELL) && (local_paca->irq_happened & PACA_IRQ_DBELL)) { local_paca->irq_happened &= ~PACA_IRQ_DBELL; if (IS_ENABLED(CONFIG_PPC_BOOK3E)) regs.trap = 0x280; @@ -291,7 +250,7 @@ void replay_soft_interrupts(void) } /* Book3E does not support soft-masking PMI interrupts */ - if (IS_ENABLED(CONFIG_PPC_BOOK3S) && (happened & PACA_IRQ_PMI)) { + if (IS_ENABLED(CONFIG_PPC_BOOK3S) && (local_paca->irq_happened & PACA_IRQ_PMI)) { local_paca->irq_happened &= ~PACA_IRQ_PMI; regs.trap = 0xf00; performance_monitor_exception(®s); @@ -299,8 +258,7 @@ void replay_soft_interrupts(void) hard_irq_disable(); } - happened = local_paca->irq_happened; - if (happened & ~PACA_IRQ_HARD_DIS) { + if (local_paca->irq_happened & ~PACA_IRQ_HARD_DIS) { /* * We are responding to the next interrupt, so interrupt-off * latencies should be reset here. @@ -345,6 +303,7 @@ notrace void arch_local_irq_restore(unsigned long mask) if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) WARN_ON_ONCE(!(mfmsr() & MSR_EE)); __hard_irq_disable(); + local_paca->irq_happened |= PACA_IRQ_HARD_DIS; } else { /* * We should already be hard disabled here. We had bugs diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index 0ad15768d762cf..7f5aae3c387d29 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -208,7 +208,7 @@ static struct rtas_args * __init new_rtas_args(int cpu, unsigned long limit) struct paca_struct **paca_ptrs __read_mostly; EXPORT_SYMBOL(paca_ptrs); -void __init __nostackprotector initialise_paca(struct paca_struct *new_paca, int cpu) +void __init initialise_paca(struct paca_struct *new_paca, int cpu) { #ifdef CONFIG_PPC_PSERIES new_paca->lppaca_ptr = NULL; @@ -241,7 +241,7 @@ void __init __nostackprotector initialise_paca(struct paca_struct *new_paca, int } /* Put the paca pointer into r13 and SPRG_PACA */ -void __nostackprotector setup_paca(struct paca_struct *new_paca) +void setup_paca(struct paca_struct *new_paca) { /* Setup r13 */ local_paca = new_paca; diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 954f41676f6925..cccb32cf0e08c7 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -1030,7 +1030,7 @@ static struct rtas_filter rtas_filters[] __ro_after_init = { { "ibm,display-message", -1, 0, -1, -1, -1 }, { "ibm,errinjct", -1, 2, -1, -1, -1, 1024 }, { "ibm,close-errinjct", -1, -1, -1, -1, -1 }, - { "ibm,open-errinct", -1, -1, -1, -1, -1 }, + { "ibm,open-errinjct", -1, -1, -1, -1, -1 }, { "ibm,get-config-addr-info2", -1, -1, -1, -1, -1 }, { "ibm,get-dynamic-sensor-state", -1, 1, -1, -1, -1 }, { "ibm,get-indices", -1, 2, 3, -1, -1 }, diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 808ec9fab6052f..da8c71f321ad3c 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -919,8 +919,6 @@ void __init setup_arch(char **cmdline_p) /* On BookE, setup per-core TLB data structures. */ setup_tlb_core_data(); - - smp_release_cpus(); #endif /* Print various info about the machine that has been gathered so far. */ @@ -944,6 +942,8 @@ void __init setup_arch(char **cmdline_p) exc_lvl_early_init(); emergency_stack_init(); + smp_release_cpus(); + initmem_init(); early_memtest(min_low_pfn << PAGE_SHIFT, max_low_pfn << PAGE_SHIFT); diff --git a/arch/powerpc/kernel/setup.h b/arch/powerpc/kernel/setup.h index 2ec835574cc940..2dd0d9cb5a2086 100644 --- a/arch/powerpc/kernel/setup.h +++ b/arch/powerpc/kernel/setup.h @@ -8,12 +8,6 @@ #ifndef __ARCH_POWERPC_KERNEL_SETUP_H #define __ARCH_POWERPC_KERNEL_SETUP_H -#ifdef CONFIG_CC_IS_CLANG -#define __nostackprotector -#else -#define __nostackprotector __attribute__((__optimize__("no-stack-protector"))) -#endif - void initialize_cache_info(void); void irqstack_early_init(void); diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 74fd47f46fa586..c28e949cc22294 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -283,7 +283,7 @@ void __init record_spr_defaults(void) * device-tree is not accessible via normal means at this point. */ -void __init __nostackprotector early_setup(unsigned long dt_ptr) +void __init early_setup(unsigned long dt_ptr) { static __initdata struct paca_struct boot_paca; diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 8c2857cbd96097..7d6cf75a7fd801 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -919,7 +919,7 @@ static struct sched_domain_topology_level powerpc_topology[] = { { NULL, }, }; -static int init_big_cores(void) +static int __init init_big_cores(void) { int cpu; diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 74efe46f55327f..7d372ff3504b27 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -552,14 +552,11 @@ void timer_interrupt(struct pt_regs *regs) struct pt_regs *old_regs; u64 now; - /* Some implementations of hotplug will get timer interrupts while - * offline, just ignore these and we also need to set - * decrementers_next_tb as MAX to make sure __check_irq_replay - * don't replay timer interrupt when return, otherwise we'll trap - * here infinitely :( + /* + * Some implementations of hotplug will get timer interrupts while + * offline, just ignore these. */ if (unlikely(!cpu_online(smp_processor_id()))) { - *next_tb = ~(u64)0; set_dec(decrementer_max); return; } diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 6db90cdf11da8c..4a1f494ef03f36 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -85,7 +85,7 @@ SECTIONS ALIGN_FUNCTION(); #endif /* careful! __ftr_alt_* sections need to be close to .text */ - *(.text.hot TEXT_MAIN .text.fixup .text.unlikely .fixup __ftr_alt_* .ref.text); + *(.text.hot .text.hot.* TEXT_MAIN .text.fixup .text.unlikely .text.unlikely.* .fixup __ftr_alt_* .ref.text); #ifdef CONFIG_PPC64 *(.tramp.ftrace.text); #endif @@ -145,6 +145,13 @@ SECTIONS __stop___entry_flush_fixup = .; } + . = ALIGN(8); + __scv_entry_flush_fixup : AT(ADDR(__scv_entry_flush_fixup) - LOAD_OFFSET) { + __start___scv_entry_flush_fixup = .; + *(__scv_entry_flush_fixup) + __stop___scv_entry_flush_fixup = .; + } + . = ALIGN(8); __stf_exit_barrier_fixup : AT(ADDR(__stf_exit_barrier_fixup) - LOAD_OFFSET) { __start___stf_exit_barrier_fixup = .; @@ -187,6 +194,12 @@ SECTIONS .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) { _sinittext = .; INIT_TEXT + + /* + *.init.text might be RO so we must ensure this section ends on + * a page boundary. + */ + . = ALIGN(PAGE_SIZE); _einittext = .; #ifdef CONFIG_PPC64 *(.tramp.ftrace.init); @@ -200,21 +213,9 @@ SECTIONS EXIT_TEXT } - .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) { - INIT_DATA - } - - .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) { - INIT_SETUP(16) - } - - .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) { - INIT_CALLS - } + . = ALIGN(PAGE_SIZE); - .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) { - CON_INITCALL - } + INIT_DATA_SECTION(16) . = ALIGN(8); __ftr_fixup : AT(ADDR(__ftr_fixup) - LOAD_OFFSET) { @@ -242,9 +243,6 @@ SECTIONS __stop___fw_ftr_fixup = .; } #endif - .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) { - INIT_RAM_FS - } PERCPU_SECTION(L1_CACHE_BYTES) diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c index 321c12a9ef6b8b..92705d6dfb6e0c 100644 --- a/arch/powerpc/lib/feature-fixups.c +++ b/arch/powerpc/lib/feature-fixups.c @@ -290,9 +290,6 @@ void do_entry_flush_fixups(enum l1d_flush_type types) long *start, *end; int i; - start = PTRRELOC(&__start___entry_flush_fixup); - end = PTRRELOC(&__stop___entry_flush_fixup); - instrs[0] = 0x60000000; /* nop */ instrs[1] = 0x60000000; /* nop */ instrs[2] = 0x60000000; /* nop */ @@ -312,6 +309,8 @@ void do_entry_flush_fixups(enum l1d_flush_type types) if (types & L1D_FLUSH_MTTRIG) instrs[i++] = 0x7c12dba6; /* mtspr TRIG2,r0 (SPR #882) */ + start = PTRRELOC(&__start___entry_flush_fixup); + end = PTRRELOC(&__stop___entry_flush_fixup); for (i = 0; start < end; start++, i++) { dest = (void *)start + *start; @@ -328,6 +327,25 @@ void do_entry_flush_fixups(enum l1d_flush_type types) patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2])); } + start = PTRRELOC(&__start___scv_entry_flush_fixup); + end = PTRRELOC(&__stop___scv_entry_flush_fixup); + for (; start < end; start++, i++) { + dest = (void *)start + *start; + + pr_devel("patching dest %lx\n", (unsigned long)dest); + + patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0])); + + if (types == L1D_FLUSH_FALLBACK) + patch_branch((struct ppc_inst *)(dest + 1), (unsigned long)&scv_entry_flush_fallback, + BRANCH_SET_LINK); + else + patch_instruction((struct ppc_inst *)(dest + 1), ppc_inst(instrs[1])); + + patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2])); + } + + printk(KERN_DEBUG "entry-flush: patched %d locations (%s flush)\n", i, (types == L1D_FLUSH_NONE) ? "no" : (types == L1D_FLUSH_FALLBACK) ? "fallback displacement" : diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index 855457ed09b546..b18bce1a209fad 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -1346,6 +1346,9 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, switch (opcode) { #ifdef __powerpc64__ case 1: + if (!cpu_has_feature(CPU_FTR_ARCH_31)) + return -1; + prefix_r = GET_PREFIX_R(word); ra = GET_PREFIX_RA(suffix); rd = (suffix >> 21) & 0x1f; @@ -2733,6 +2736,9 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, } break; case 1: /* Prefixed instructions */ + if (!cpu_has_feature(CPU_FTR_ARCH_31)) + return -1; + prefix_r = GET_PREFIX_R(word); ra = GET_PREFIX_RA(suffix); op->update_reg = ra; @@ -2751,6 +2757,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, case 41: /* plwa */ op->type = MKOP(LOAD, PREFIXED | SIGNEXT, 4); break; +#ifdef CONFIG_VSX case 42: /* plxsd */ op->reg = rd + 32; op->type = MKOP(LOAD_VSX, PREFIXED, 8); @@ -2791,13 +2798,14 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, op->element_size = 16; op->vsx_flags = VSX_CHECK_VEC; break; +#endif /* CONFIG_VSX */ case 56: /* plq */ op->type = MKOP(LOAD, PREFIXED, 16); break; case 57: /* pld */ op->type = MKOP(LOAD, PREFIXED, 8); break; - case 60: /* stq */ + case 60: /* pstq */ op->type = MKOP(STORE, PREFIXED, 16); break; case 61: /* pstd */ diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 0add963a849b3d..72e1b51beb10c5 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -303,7 +303,6 @@ static inline void cmo_account_page_fault(void) static inline void cmo_account_page_fault(void) { } #endif /* CONFIG_PPC_SMLPAR */ -#ifdef CONFIG_PPC_BOOK3S static void sanity_check_fault(bool is_write, bool is_user, unsigned long error_code, unsigned long address) { @@ -320,6 +319,9 @@ static void sanity_check_fault(bool is_write, bool is_user, return; } + if (!IS_ENABLED(CONFIG_PPC_BOOK3S)) + return; + /* * For hash translation mode, we should never get a * PROTFAULT. Any update to pte to reduce access will result in us @@ -354,10 +356,6 @@ static void sanity_check_fault(bool is_write, bool is_user, WARN_ON_ONCE(error_code & DSISR_PROTFAULT); } -#else -static void sanity_check_fault(bool is_write, bool is_user, - unsigned long error_code, unsigned long address) { } -#endif /* CONFIG_PPC_BOOK3S */ /* * Define the correct "is_write" bit in error_code based diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 3fc325bebe4dff..22eb1c718e6224 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -532,7 +532,7 @@ void __flush_dcache_icache(void *p) * space occurs, before returning to user space. */ - if (cpu_has_feature(MMU_FTR_TYPE_44x)) + if (mmu_has_feature(MMU_FTR_TYPE_44x)) return; invalidate_icache_range(addr, addr + PAGE_SIZE); diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 08643cba149482..43599e671d383f 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -137,6 +137,9 @@ static void pmao_restore_workaround(bool ebb) { } bool is_sier_available(void) { + if (!ppmu) + return false; + if (ppmu->flags & PPMU_HAS_SIER) return true; @@ -2121,6 +2124,16 @@ static void record_and_restart(struct perf_event *event, unsigned long val, local64_set(&event->hw.period_left, left); perf_event_update_userpage(event); + /* + * Due to hardware limitation, sometimes SIAR could sample a kernel + * address even when freeze on supervisor state (kernel) is set in + * MMCR2. Check attr.exclude_kernel and address to drop the sample in + * these cases. + */ + if (event->attr.exclude_kernel && record) + if (is_kernel_addr(mfspr(SPRN_SIAR))) + record = 0; + /* * Finally record data if requested. */ diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c index 2848904df63832..e1a21d34c6e498 100644 --- a/arch/powerpc/perf/isa207-common.c +++ b/arch/powerpc/perf/isa207-common.c @@ -247,6 +247,9 @@ void isa207_get_mem_weight(u64 *weight) u64 sier = mfspr(SPRN_SIER); u64 val = (sier & ISA207_SIER_TYPE_MASK) >> ISA207_SIER_TYPE_SHIFT; + if (cpu_has_feature(CPU_FTR_ARCH_31)) + mantissa = P10_MMCRA_THR_CTR_MANT(mmcra); + if (val == 0 || val == 7) *weight = 0; else @@ -311,9 +314,11 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp) } if (unit >= 6 && unit <= 9) { - if (cpu_has_feature(CPU_FTR_ARCH_31) && (unit == 6)) { - mask |= CNST_L2L3_GROUP_MASK; - value |= CNST_L2L3_GROUP_VAL(event >> p10_L2L3_EVENT_SHIFT); + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + if (unit == 6) { + mask |= CNST_L2L3_GROUP_MASK; + value |= CNST_L2L3_GROUP_VAL(event >> p10_L2L3_EVENT_SHIFT); + } } else if (cpu_has_feature(CPU_FTR_ARCH_300)) { mask |= CNST_CACHE_GROUP_MASK; value |= CNST_CACHE_GROUP_VAL(event & 0xff); @@ -339,12 +344,22 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp) value |= CNST_L1_QUAL_VAL(cache); } + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + mask |= CNST_RADIX_SCOPE_GROUP_MASK; + value |= CNST_RADIX_SCOPE_GROUP_VAL(event >> p10_EVENT_RADIX_SCOPE_QUAL_SHIFT); + } + if (is_event_marked(event)) { mask |= CNST_SAMPLE_MASK; value |= CNST_SAMPLE_VAL(event >> EVENT_SAMPLE_SHIFT); } - if (cpu_has_feature(CPU_FTR_ARCH_300)) { + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + if (event_is_threshold(event)) { + mask |= CNST_THRESH_CTL_SEL_MASK; + value |= CNST_THRESH_CTL_SEL_VAL(event >> EVENT_THRESH_SHIFT); + } + } else if (cpu_has_feature(CPU_FTR_ARCH_300)) { if (event_is_threshold(event) && is_thresh_cmp_valid(event)) { mask |= CNST_THRESH_MASK; value |= CNST_THRESH_VAL(event >> EVENT_THRESH_SHIFT); @@ -456,6 +471,13 @@ int isa207_compute_mmcr(u64 event[], int n_ev, } } + /* Set RADIX_SCOPE_QUAL bit */ + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + val = (event[i] >> p10_EVENT_RADIX_SCOPE_QUAL_SHIFT) & + p10_EVENT_RADIX_SCOPE_QUAL_MASK; + mmcr1 |= val << p10_MMCR1_RADIX_SCOPE_QUAL_SHIFT; + } + if (is_event_marked(event[i])) { mmcra |= MMCRA_SAMPLE_ENABLE; diff --git a/arch/powerpc/perf/isa207-common.h b/arch/powerpc/perf/isa207-common.h index 7025de5e60e7d8..454b32c3144065 100644 --- a/arch/powerpc/perf/isa207-common.h +++ b/arch/powerpc/perf/isa207-common.h @@ -101,6 +101,9 @@ #define p10_EVENT_CACHE_SEL_MASK 0x3ull #define p10_EVENT_MMCR3_MASK 0x7fffull #define p10_EVENT_MMCR3_SHIFT 45 +#define p10_EVENT_RADIX_SCOPE_QUAL_SHIFT 9 +#define p10_EVENT_RADIX_SCOPE_QUAL_MASK 0x1 +#define p10_MMCR1_RADIX_SCOPE_QUAL_SHIFT 45 #define p10_EVENT_VALID_MASK \ ((p10_SDAR_MODE_MASK << p10_SDAR_MODE_SHIFT | \ @@ -112,6 +115,7 @@ (p9_EVENT_COMBINE_MASK << p9_EVENT_COMBINE_SHIFT) | \ (p10_EVENT_MMCR3_MASK << p10_EVENT_MMCR3_SHIFT) | \ (EVENT_MARKED_MASK << EVENT_MARKED_SHIFT) | \ + (p10_EVENT_RADIX_SCOPE_QUAL_MASK << p10_EVENT_RADIX_SCOPE_QUAL_SHIFT) | \ EVENT_LINUX_MASK | \ EVENT_PSEL_MASK)) /* @@ -125,9 +129,9 @@ * * 28 24 20 16 12 8 4 0 * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - * [ ] | [ ] [ sample ] [ ] [6] [5] [4] [3] [2] [1] - * | | | | - * BHRB IFM -* | | | Count of events for each PMC. + * [ ] | [ ] | [ sample ] [ ] [6] [5] [4] [3] [2] [1] + * | | | | | + * BHRB IFM -* | | |*radix_scope | Count of events for each PMC. * EBB -* | | p1, p2, p3, p4, p5, p6. * L1 I/D qualifier -* | * nc - number of counters -* @@ -145,6 +149,9 @@ #define CNST_THRESH_VAL(v) (((v) & EVENT_THRESH_MASK) << 32) #define CNST_THRESH_MASK CNST_THRESH_VAL(EVENT_THRESH_MASK) +#define CNST_THRESH_CTL_SEL_VAL(v) (((v) & 0x7ffull) << 32) +#define CNST_THRESH_CTL_SEL_MASK CNST_THRESH_CTL_SEL_VAL(0x7ff) + #define CNST_EBB_VAL(v) (((v) & EVENT_EBB_MASK) << 24) #define CNST_EBB_MASK CNST_EBB_VAL(EVENT_EBB_MASK) @@ -165,6 +172,9 @@ #define CNST_L2L3_GROUP_VAL(v) (((v) & 0x1full) << 55) #define CNST_L2L3_GROUP_MASK CNST_L2L3_GROUP_VAL(0x1f) +#define CNST_RADIX_SCOPE_GROUP_VAL(v) (((v) & 0x1ull) << 21) +#define CNST_RADIX_SCOPE_GROUP_MASK CNST_RADIX_SCOPE_GROUP_VAL(1) + /* * For NC we are counting up to 4 events. This requires three bits, and we need * the fifth event to overflow and set the 4th bit. To achieve that we bias the @@ -221,6 +231,10 @@ #define MMCRA_THR_CTR_EXP(v) (((v) >> MMCRA_THR_CTR_EXP_SHIFT) &\ MMCRA_THR_CTR_EXP_MASK) +#define P10_MMCRA_THR_CTR_MANT_MASK 0xFFul +#define P10_MMCRA_THR_CTR_MANT(v) (((v) >> MMCRA_THR_CTR_MANT_SHIFT) &\ + P10_MMCRA_THR_CTR_MANT_MASK) + /* MMCRA Threshold Compare bit constant for power9 */ #define p9_MMCRA_THR_CMP_SHIFT 45 diff --git a/arch/powerpc/perf/power10-pmu.c b/arch/powerpc/perf/power10-pmu.c index 9dbe8f9b89b4f2..cf44fb7446130c 100644 --- a/arch/powerpc/perf/power10-pmu.c +++ b/arch/powerpc/perf/power10-pmu.c @@ -23,10 +23,10 @@ * * 28 24 20 16 12 8 4 0 * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - * [ ] [ sample ] [ ] [ ] [ pmc ] [unit ] [ ] m [ pmcxsel ] - * | | | | | | - * | | | | | *- mark - * | | | *- L1/L2/L3 cache_sel | + * [ ] [ sample ] [ ] [ ] [ pmc ] [unit ] [ ] | m [ pmcxsel ] + * | | | | | | | + * | | | | | | *- mark + * | | | *- L1/L2/L3 cache_sel | |*-radix_scope_qual * | | sdar_mode | * | *- sampling mode for marked events *- combine * | @@ -59,6 +59,7 @@ * * MMCR1[16] = cache_sel[0] * MMCR1[17] = cache_sel[1] + * MMCR1[18] = radix_scope_qual * * if mark: * MMCRA[63] = 1 (SAMPLE_ENABLE) @@ -175,6 +176,7 @@ PMU_FORMAT_ATTR(src_sel, "config:45-46"); PMU_FORMAT_ATTR(invert_bit, "config:47"); PMU_FORMAT_ATTR(src_mask, "config:48-53"); PMU_FORMAT_ATTR(src_match, "config:54-59"); +PMU_FORMAT_ATTR(radix_scope, "config:9"); static struct attribute *power10_pmu_format_attr[] = { &format_attr_event.attr, @@ -194,6 +196,7 @@ static struct attribute *power10_pmu_format_attr[] = { &format_attr_invert_bit.attr, &format_attr_src_mask.attr, &format_attr_src_match.attr, + &format_attr_radix_scope.attr, NULL, }; diff --git a/arch/powerpc/platforms/8xx/micropatch.c b/arch/powerpc/platforms/8xx/micropatch.c index aed4bc75f35205..aef179fcbd4f86 100644 --- a/arch/powerpc/platforms/8xx/micropatch.c +++ b/arch/powerpc/platforms/8xx/micropatch.c @@ -360,6 +360,17 @@ void __init cpm_load_patch(cpm8xx_t *cp) if (IS_ENABLED(CONFIG_SMC_UCODE_PATCH)) { smc_uart_t *smp; + if (IS_ENABLED(CONFIG_PPC_EARLY_DEBUG_CPM)) { + int i; + + for (i = 0; i < sizeof(*smp); i += 4) { + u32 __iomem *src = (u32 __iomem *)&cp->cp_dparam[PROFF_SMC1 + i]; + u32 __iomem *dst = (u32 __iomem *)&cp->cp_dparam[PROFF_DSP1 + i]; + + out_be32(dst, in_be32(src)); + } + } + smp = (smc_uart_t *)&cp->cp_dparam[PROFF_SMC1]; out_be16(&smp->smc_rpbase, 0x1ec0); smp = (smc_uart_t *)&cp->cp_dparam[PROFF_SMC2]; diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index c194c4ae8bc7d0..32a9c4c09b989e 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -36,7 +36,7 @@ config PPC_BOOK3S_6xx select PPC_HAVE_PMU_SUPPORT select PPC_HAVE_KUEP select PPC_HAVE_KUAP - select HAVE_ARCH_VMAP_STACK if !ADB_PMU + select HAVE_ARCH_VMAP_STACK config PPC_85xx bool "Freescale 85xx" diff --git a/arch/powerpc/platforms/powermac/sleep.S b/arch/powerpc/platforms/powermac/sleep.S index 7e0f8ba6e54a50..d497a60003d2d5 100644 --- a/arch/powerpc/platforms/powermac/sleep.S +++ b/arch/powerpc/platforms/powermac/sleep.S @@ -44,7 +44,8 @@ #define SL_TB 0xa0 #define SL_R2 0xa8 #define SL_CR 0xac -#define SL_R12 0xb0 /* r12 to r31 */ +#define SL_LR 0xb0 +#define SL_R12 0xb4 /* r12 to r31 */ #define SL_SIZE (SL_R12 + 80) .section .text @@ -63,105 +64,107 @@ _GLOBAL(low_sleep_handler) blr #else mflr r0 - stw r0,4(r1) - stwu r1,-SL_SIZE(r1) + lis r11,sleep_storage@ha + addi r11,r11,sleep_storage@l + stw r0,SL_LR(r11) mfcr r0 - stw r0,SL_CR(r1) - stw r2,SL_R2(r1) - stmw r12,SL_R12(r1) + stw r0,SL_CR(r11) + stw r1,SL_SP(r11) + stw r2,SL_R2(r11) + stmw r12,SL_R12(r11) /* Save MSR & SDR1 */ mfmsr r4 - stw r4,SL_MSR(r1) + stw r4,SL_MSR(r11) mfsdr1 r4 - stw r4,SL_SDR1(r1) + stw r4,SL_SDR1(r11) /* Get a stable timebase and save it */ 1: mftbu r4 - stw r4,SL_TB(r1) + stw r4,SL_TB(r11) mftb r5 - stw r5,SL_TB+4(r1) + stw r5,SL_TB+4(r11) mftbu r3 cmpw r3,r4 bne 1b /* Save SPRGs */ mfsprg r4,0 - stw r4,SL_SPRG0(r1) + stw r4,SL_SPRG0(r11) mfsprg r4,1 - stw r4,SL_SPRG0+4(r1) + stw r4,SL_SPRG0+4(r11) mfsprg r4,2 - stw r4,SL_SPRG0+8(r1) + stw r4,SL_SPRG0+8(r11) mfsprg r4,3 - stw r4,SL_SPRG0+12(r1) + stw r4,SL_SPRG0+12(r11) /* Save BATs */ mfdbatu r4,0 - stw r4,SL_DBAT0(r1) + stw r4,SL_DBAT0(r11) mfdbatl r4,0 - stw r4,SL_DBAT0+4(r1) + stw r4,SL_DBAT0+4(r11) mfdbatu r4,1 - stw r4,SL_DBAT1(r1) + stw r4,SL_DBAT1(r11) mfdbatl r4,1 - stw r4,SL_DBAT1+4(r1) + stw r4,SL_DBAT1+4(r11) mfdbatu r4,2 - stw r4,SL_DBAT2(r1) + stw r4,SL_DBAT2(r11) mfdbatl r4,2 - stw r4,SL_DBAT2+4(r1) + stw r4,SL_DBAT2+4(r11) mfdbatu r4,3 - stw r4,SL_DBAT3(r1) + stw r4,SL_DBAT3(r11) mfdbatl r4,3 - stw r4,SL_DBAT3+4(r1) + stw r4,SL_DBAT3+4(r11) mfibatu r4,0 - stw r4,SL_IBAT0(r1) + stw r4,SL_IBAT0(r11) mfibatl r4,0 - stw r4,SL_IBAT0+4(r1) + stw r4,SL_IBAT0+4(r11) mfibatu r4,1 - stw r4,SL_IBAT1(r1) + stw r4,SL_IBAT1(r11) mfibatl r4,1 - stw r4,SL_IBAT1+4(r1) + stw r4,SL_IBAT1+4(r11) mfibatu r4,2 - stw r4,SL_IBAT2(r1) + stw r4,SL_IBAT2(r11) mfibatl r4,2 - stw r4,SL_IBAT2+4(r1) + stw r4,SL_IBAT2+4(r11) mfibatu r4,3 - stw r4,SL_IBAT3(r1) + stw r4,SL_IBAT3(r11) mfibatl r4,3 - stw r4,SL_IBAT3+4(r1) + stw r4,SL_IBAT3+4(r11) BEGIN_MMU_FTR_SECTION mfspr r4,SPRN_DBAT4U - stw r4,SL_DBAT4(r1) + stw r4,SL_DBAT4(r11) mfspr r4,SPRN_DBAT4L - stw r4,SL_DBAT4+4(r1) + stw r4,SL_DBAT4+4(r11) mfspr r4,SPRN_DBAT5U - stw r4,SL_DBAT5(r1) + stw r4,SL_DBAT5(r11) mfspr r4,SPRN_DBAT5L - stw r4,SL_DBAT5+4(r1) + stw r4,SL_DBAT5+4(r11) mfspr r4,SPRN_DBAT6U - stw r4,SL_DBAT6(r1) + stw r4,SL_DBAT6(r11) mfspr r4,SPRN_DBAT6L - stw r4,SL_DBAT6+4(r1) + stw r4,SL_DBAT6+4(r11) mfspr r4,SPRN_DBAT7U - stw r4,SL_DBAT7(r1) + stw r4,SL_DBAT7(r11) mfspr r4,SPRN_DBAT7L - stw r4,SL_DBAT7+4(r1) + stw r4,SL_DBAT7+4(r11) mfspr r4,SPRN_IBAT4U - stw r4,SL_IBAT4(r1) + stw r4,SL_IBAT4(r11) mfspr r4,SPRN_IBAT4L - stw r4,SL_IBAT4+4(r1) + stw r4,SL_IBAT4+4(r11) mfspr r4,SPRN_IBAT5U - stw r4,SL_IBAT5(r1) + stw r4,SL_IBAT5(r11) mfspr r4,SPRN_IBAT5L - stw r4,SL_IBAT5+4(r1) + stw r4,SL_IBAT5+4(r11) mfspr r4,SPRN_IBAT6U - stw r4,SL_IBAT6(r1) + stw r4,SL_IBAT6(r11) mfspr r4,SPRN_IBAT6L - stw r4,SL_IBAT6+4(r1) + stw r4,SL_IBAT6+4(r11) mfspr r4,SPRN_IBAT7U - stw r4,SL_IBAT7(r1) + stw r4,SL_IBAT7(r11) mfspr r4,SPRN_IBAT7L - stw r4,SL_IBAT7+4(r1) + stw r4,SL_IBAT7+4(r11) END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS) /* Backup various CPU config stuffs */ @@ -180,9 +183,9 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS) lis r5,grackle_wake_up@ha addi r5,r5,grackle_wake_up@l tophys(r5,r5) - stw r5,SL_PC(r1) + stw r5,SL_PC(r11) lis r4,KERNELBASE@h - tophys(r5,r1) + tophys(r5,r11) addi r5,r5,SL_PC lis r6,MAGIC@ha addi r6,r6,MAGIC@l @@ -194,12 +197,6 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS) tophys(r3,r3) stw r3,0x80(r4) stw r5,0x84(r4) - /* Store a pointer to our backup storage into - * a kernel global - */ - lis r3,sleep_storage@ha - addi r3,r3,sleep_storage@l - stw r5,0(r3) .globl low_cpu_offline_self low_cpu_offline_self: @@ -279,7 +276,7 @@ _GLOBAL(core99_wake_up) lis r3,sleep_storage@ha addi r3,r3,sleep_storage@l tophys(r3,r3) - lwz r1,0(r3) + addi r1,r3,SL_PC /* Pass thru to older resume code ... */ _ASM_NOKPROBE_SYMBOL(core99_wake_up) @@ -399,13 +396,6 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS) blt 1b sync - /* restore the MSR and turn on the MMU */ - lwz r3,SL_MSR(r1) - bl turn_on_mmu - - /* get back the stack pointer */ - tovirt(r1,r1) - /* Restore TB */ li r3,0 mttbl r3 @@ -419,28 +409,24 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS) mtcr r0 lwz r2,SL_R2(r1) lmw r12,SL_R12(r1) - addi r1,r1,SL_SIZE - lwz r0,4(r1) - mtlr r0 - blr -_ASM_NOKPROBE_SYMBOL(grackle_wake_up) -turn_on_mmu: - mflr r4 - tovirt(r4,r4) + /* restore the MSR and SP and turn on the MMU and return */ + lwz r3,SL_MSR(r1) + lwz r4,SL_LR(r1) + lwz r1,SL_SP(r1) mtsrr0 r4 mtsrr1 r3 sync isync rfi -_ASM_NOKPROBE_SYMBOL(turn_on_mmu) +_ASM_NOKPROBE_SYMBOL(grackle_wake_up) #endif /* defined(CONFIG_PM) || defined(CONFIG_CPU_FREQ) */ - .section .data + .section .bss .balign L1_CACHE_BYTES sleep_storage: - .long 0 + .space SL_SIZE .balign L1_CACHE_BYTES, 0 #endif /* CONFIG_PPC_BOOK3S_32 */ diff --git a/arch/powerpc/platforms/powernv/memtrace.c b/arch/powerpc/platforms/powernv/memtrace.c index 6828108486f836..0e42fe2d7b6ac9 100644 --- a/arch/powerpc/platforms/powernv/memtrace.c +++ b/arch/powerpc/platforms/powernv/memtrace.c @@ -30,6 +30,7 @@ struct memtrace_entry { char name[16]; }; +static DEFINE_MUTEX(memtrace_mutex); static u64 memtrace_size; static struct memtrace_entry *memtrace_array; @@ -67,6 +68,23 @@ static int change_memblock_state(struct memory_block *mem, void *arg) return 0; } +static void memtrace_clear_range(unsigned long start_pfn, + unsigned long nr_pages) +{ + unsigned long pfn; + + /* + * As pages are offline, we cannot trust the memmap anymore. As HIGHMEM + * does not apply, avoid passing around "struct page" and use + * clear_page() instead directly. + */ + for (pfn = start_pfn; pfn < start_pfn + nr_pages; pfn++) { + if (IS_ALIGNED(pfn, PAGES_PER_SECTION)) + cond_resched(); + clear_page(__va(PFN_PHYS(pfn))); + } +} + /* called with device_hotplug_lock held */ static bool memtrace_offline_pages(u32 nid, u64 start_pfn, u64 nr_pages) { @@ -111,6 +129,11 @@ static u64 memtrace_alloc_node(u32 nid, u64 size) lock_device_hotplug(); for (base_pfn = end_pfn; base_pfn > start_pfn; base_pfn -= nr_pages) { if (memtrace_offline_pages(nid, base_pfn, nr_pages) == true) { + /* + * Clear the range while we still have a linear + * mapping. + */ + memtrace_clear_range(base_pfn, nr_pages); /* * Remove memory in memory block size chunks so that * iomem resources are always split to the same size and @@ -257,6 +280,7 @@ static int memtrace_online(void) static int memtrace_enable_set(void *data, u64 val) { + int rc = -EAGAIN; u64 bytes; /* @@ -269,25 +293,31 @@ static int memtrace_enable_set(void *data, u64 val) return -EINVAL; } + mutex_lock(&memtrace_mutex); + /* Re-add/online previously removed/offlined memory */ if (memtrace_size) { if (memtrace_online()) - return -EAGAIN; + goto out_unlock; } - if (!val) - return 0; + if (!val) { + rc = 0; + goto out_unlock; + } /* Offline and remove memory */ if (memtrace_init_regions_runtime(val)) - return -EINVAL; + goto out_unlock; if (memtrace_init_debugfs()) - return -EINVAL; + goto out_unlock; memtrace_size = val; - - return 0; + rc = 0; +out_unlock: + mutex_unlock(&memtrace_mutex); + return rc; } static int memtrace_enable_get(void *data, u64 *val) diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c index abeaa533b976be..b711dc3262a308 100644 --- a/arch/powerpc/platforms/powernv/npu-dma.c +++ b/arch/powerpc/platforms/powernv/npu-dma.c @@ -385,7 +385,8 @@ static void pnv_npu_peers_take_ownership(struct iommu_table_group *table_group) for (i = 0; i < npucomp->pe_num; ++i) { struct pnv_ioda_pe *pe = npucomp->pe[i]; - if (!pe->table_group.ops->take_ownership) + if (!pe->table_group.ops || + !pe->table_group.ops->take_ownership) continue; pe->table_group.ops->take_ownership(&pe->table_group); } @@ -401,7 +402,8 @@ static void pnv_npu_peers_release_ownership( for (i = 0; i < npucomp->pe_num; ++i) { struct pnv_ioda_pe *pe = npucomp->pe[i]; - if (!pe->table_group.ops->release_ownership) + if (!pe->table_group.ops || + !pe->table_group.ops->release_ownership) continue; pe->table_group.ops->release_ownership(&pe->table_group); } @@ -623,6 +625,11 @@ int pnv_npu2_map_lpar_dev(struct pci_dev *gpdev, unsigned int lparid, return -ENODEV; hose = pci_bus_to_host(npdev->bus); + if (hose->npu == NULL) { + dev_info_once(&npdev->dev, "Nvlink1 does not support contexts"); + return 0; + } + nphb = hose->private_data; dev_dbg(&gpdev->dev, "Map LPAR opalid=%llu lparid=%u\n", @@ -670,6 +677,11 @@ int pnv_npu2_unmap_lpar_dev(struct pci_dev *gpdev) return -ENODEV; hose = pci_bus_to_host(npdev->bus); + if (hose->npu == NULL) { + dev_info_once(&npdev->dev, "Nvlink1 does not support contexts"); + return 0; + } + nphb = hose->private_data; dev_dbg(&gpdev->dev, "destroy context opalid=%llu\n", diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index d95954ad4c0af5..c61c3b62c8c628 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -731,7 +731,7 @@ int opal_hmi_exception_early2(struct pt_regs *regs) return 1; } -/* HMI exception handler called in virtual mode during check_irq_replay. */ +/* HMI exception handler called in virtual mode when irqs are next enabled. */ int opal_handle_hmi_exception(struct pt_regs *regs) { /* diff --git a/arch/powerpc/platforms/powernv/pci-sriov.c b/arch/powerpc/platforms/powernv/pci-sriov.c index c4434f20f42fae..28aac933a43917 100644 --- a/arch/powerpc/platforms/powernv/pci-sriov.c +++ b/arch/powerpc/platforms/powernv/pci-sriov.c @@ -422,7 +422,7 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs) { struct pnv_iov_data *iov; struct pnv_phb *phb; - unsigned int win; + int win; struct resource *res; int i, j; int64_t rc; diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c index a02012f1b04afd..12cbffd3c2e32c 100644 --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c @@ -746,6 +746,7 @@ static int dlpar_cpu_add_by_count(u32 cpus_to_add) parent = of_find_node_by_path("/cpus"); if (!parent) { pr_warn("Could not find CPU root node in device tree\n"); + kfree(cpu_drcs); return -1; } diff --git a/arch/powerpc/platforms/pseries/suspend.c b/arch/powerpc/platforms/pseries/suspend.c index 81e0ac58d62047..64b36a93c33a63 100644 --- a/arch/powerpc/platforms/pseries/suspend.c +++ b/arch/powerpc/platforms/pseries/suspend.c @@ -13,7 +13,6 @@ #include #include #include -#include "../../kernel/cacheinfo.h" static u64 stream_id; static struct device suspend_dev; @@ -78,9 +77,7 @@ static void pseries_suspend_enable_irqs(void) * Update configuration which can be modified based on device tree * changes during resume. */ - cacheinfo_cpu_offline(smp_processor_id()); post_mobility_fixup(); - cacheinfo_cpu_online(smp_processor_id()); } /** @@ -187,7 +184,6 @@ static struct bus_type suspend_subsys = { static const struct platform_suspend_ops pseries_suspend_ops = { .valid = suspend_valid_only_mem, - .begin = pseries_suspend_begin, .prepare_late = pseries_prepare_late, .enter = pseries_suspend_enter, }; diff --git a/arch/powerpc/sysdev/mpic_msgr.c b/arch/powerpc/sysdev/mpic_msgr.c index f6b253e2be4097..36ec0bdd8b63c4 100644 --- a/arch/powerpc/sysdev/mpic_msgr.c +++ b/arch/powerpc/sysdev/mpic_msgr.c @@ -191,7 +191,7 @@ static int mpic_msgr_probe(struct platform_device *dev) /* IO map the message register block. */ of_address_to_resource(np, 0, &rsrc); - msgr_block_addr = ioremap(rsrc.start, resource_size(&rsrc)); + msgr_block_addr = devm_ioremap(&dev->dev, rsrc.start, resource_size(&rsrc)); if (!msgr_block_addr) { dev_err(&dev->dev, "Failed to iomap MPIC message registers"); return -EFAULT; diff --git a/arch/powerpc/xmon/nonstdio.c b/arch/powerpc/xmon/nonstdio.c index 5c1a50912229a0..9b0d85bff021e9 100644 --- a/arch/powerpc/xmon/nonstdio.c +++ b/arch/powerpc/xmon/nonstdio.c @@ -178,7 +178,7 @@ void xmon_printf(const char *format, ...) if (n && rc == 0) { /* No udbg hooks, fallback to printk() - dangerous */ - printk("%s", xmon_outbuf); + pr_cont("%s", xmon_outbuf); } } diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 55c43a6c91112a..5559edf36756c4 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -1383,6 +1383,7 @@ static long check_bp_loc(unsigned long addr) return 1; } +#ifndef CONFIG_PPC_8xx static int find_free_data_bpt(void) { int i; @@ -1394,6 +1395,7 @@ static int find_free_data_bpt(void) printf("Couldn't find free breakpoint register\n"); return -1; } +#endif static void print_data_bpts(void) { diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 44377fd7860e43..234a21d26f6743 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -134,7 +134,7 @@ config PA_BITS config PAGE_OFFSET hex - default 0xC0000000 if 32BIT && MAXPHYSMEM_2GB + default 0xC0000000 if 32BIT && MAXPHYSMEM_1GB default 0x80000000 if 64BIT && !MMU default 0xffffffff80000000 if 64BIT && MAXPHYSMEM_2GB default 0xffffffe000000000 if 64BIT && MAXPHYSMEM_128GB @@ -247,10 +247,12 @@ config MODULE_SECTIONS choice prompt "Maximum Physical Memory" - default MAXPHYSMEM_2GB if 32BIT + default MAXPHYSMEM_1GB if 32BIT default MAXPHYSMEM_2GB if 64BIT && CMODEL_MEDLOW default MAXPHYSMEM_128GB if 64BIT && CMODEL_MEDANY + config MAXPHYSMEM_1GB + bool "1GiB" config MAXPHYSMEM_2GB bool "2GiB" config MAXPHYSMEM_128GB diff --git a/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts index 4a2729f5ca3f01..24d75a146e02d4 100644 --- a/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts +++ b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts @@ -88,7 +88,9 @@ phy-mode = "gmii"; phy-handle = <&phy0>; phy0: ethernet-phy@0 { + compatible = "ethernet-phy-id0007.0771"; reg = <0>; + reset-gpios = <&gpio 12 GPIO_ACTIVE_LOW>; }; }; diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig index d222d353d86d40..8c3d1e4517031a 100644 --- a/arch/riscv/configs/defconfig +++ b/arch/riscv/configs/defconfig @@ -64,6 +64,8 @@ CONFIG_HW_RANDOM=y CONFIG_HW_RANDOM_VIRTIO=y CONFIG_SPI=y CONFIG_SPI_SIFIVE=y +CONFIG_GPIOLIB=y +CONFIG_GPIO_SIFIVE=y # CONFIG_PTP_1588_CLOCK is not set CONFIG_POWER_RESET=y CONFIG_DRM=y diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild index 59dd7be550054f..445ccc97305a5e 100644 --- a/arch/riscv/include/asm/Kbuild +++ b/arch/riscv/include/asm/Kbuild @@ -3,6 +3,5 @@ generic-y += early_ioremap.h generic-y += extable.h generic-y += flat.h generic-y += kvm_para.h -generic-y += local64.h generic-y += user.h generic-y += vmlinux.lds.h diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 183f1f4b2ae66d..73e8b5e5bb6544 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -99,7 +99,6 @@ | _PAGE_DIRTY) #define PAGE_KERNEL __pgprot(_PAGE_KERNEL) -#define PAGE_KERNEL_EXEC __pgprot(_PAGE_KERNEL | _PAGE_EXEC) #define PAGE_KERNEL_READ __pgprot(_PAGE_KERNEL & ~_PAGE_WRITE) #define PAGE_KERNEL_EXEC __pgprot(_PAGE_KERNEL | _PAGE_EXEC) #define PAGE_KERNEL_READ_EXEC __pgprot((_PAGE_KERNEL & ~_PAGE_WRITE) \ diff --git a/arch/riscv/include/asm/vdso.h b/arch/riscv/include/asm/vdso.h index 8454f746bbfd0f..1453a2f563bcc8 100644 --- a/arch/riscv/include/asm/vdso.h +++ b/arch/riscv/include/asm/vdso.h @@ -10,7 +10,7 @@ #include -#ifndef GENERIC_TIME_VSYSCALL +#ifndef CONFIG_GENERIC_TIME_VSYSCALL struct vdso_data { }; #endif diff --git a/arch/riscv/kernel/cacheinfo.c b/arch/riscv/kernel/cacheinfo.c index de59dd457b4158..d8678135704425 100644 --- a/arch/riscv/kernel/cacheinfo.c +++ b/arch/riscv/kernel/cacheinfo.c @@ -26,7 +26,16 @@ cache_get_priv_group(struct cacheinfo *this_leaf) static struct cacheinfo *get_cacheinfo(u32 level, enum cache_type type) { - struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(smp_processor_id()); + /* + * Using raw_smp_processor_id() elides a preemptability check, but this + * is really indicative of a larger problem: the cacheinfo UABI assumes + * that cores have a homonogenous view of the cache hierarchy. That + * happens to be the case for the current set of RISC-V systems, but + * likely won't be true in general. Since there's no way to provide + * correct information for these systems via the current UABI we're + * just eliding the check for now. + */ + struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(raw_smp_processor_id()); struct cacheinfo *this_leaf; int index; diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index 524d918f3601b2..744f3209c48d0b 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -124,15 +124,15 @@ skip_context_tracking: REG_L a1, (a1) jr a1 1: -#ifdef CONFIG_TRACE_IRQFLAGS - call trace_hardirqs_on -#endif /* * Exceptions run with interrupts enabled or disabled depending on the * state of SR_PIE in m/sstatus. */ andi t0, s1, SR_PIE beqz t0, 1f +#ifdef CONFIG_TRACE_IRQFLAGS + call trace_hardirqs_on +#endif csrs CSR_STATUS, SR_IE 1: @@ -155,6 +155,15 @@ skip_context_tracking: tail do_trap_unknown handle_syscall: +#ifdef CONFIG_RISCV_M_MODE + /* + * When running is M-Mode (no MMU config), MPIE does not get set. + * As a result, we need to force enable interrupts here because + * handle_exception did not do set SR_IE as it always sees SR_PIE + * being cleared. + */ + csrs CSR_STATUS, SR_IE +#endif #if defined(CONFIG_TRACE_IRQFLAGS) || defined(CONFIG_CONTEXT_TRACKING) /* Recover a0 - a7 for system calls */ REG_L a0, PT_A0(sp) @@ -186,14 +195,7 @@ check_syscall_nr: * Syscall number held in a7. * If syscall number is above allowed value, redirect to ni_syscall. */ - bge a7, t0, 1f - /* - * Check if syscall is rejected by tracer, i.e., a7 == -1. - * If yes, we pretend it was executed. - */ - li t1, -1 - beq a7, t1, ret_from_syscall_rejected - blt a7, t1, 1f + bgeu a7, t0, 1f /* Call syscall */ la s0, sys_call_table slli t0, a7, RISCV_LGPTR diff --git a/arch/riscv/kernel/time.c b/arch/riscv/kernel/time.c index 4d3a1048ad8b1c..8a5cf99c077624 100644 --- a/arch/riscv/kernel/time.c +++ b/arch/riscv/kernel/time.c @@ -4,6 +4,7 @@ * Copyright (C) 2017 SiFive */ +#include #include #include #include @@ -24,6 +25,8 @@ void __init time_init(void) riscv_timebase = prop; lpj_fine = riscv_timebase / HZ; + + of_clk_init(NULL); timer_probe(); } diff --git a/arch/riscv/kernel/vdso.c b/arch/riscv/kernel/vdso.c index 678204231700ca..3f1d35e7c98a62 100644 --- a/arch/riscv/kernel/vdso.c +++ b/arch/riscv/kernel/vdso.c @@ -12,7 +12,7 @@ #include #include #include -#ifdef GENERIC_TIME_VSYSCALL +#ifdef CONFIG_GENERIC_TIME_VSYSCALL #include #else #include diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 8e577f14f1205d..608082fb9a6c6c 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -155,9 +155,10 @@ static void __init setup_initrd(void) void __init setup_bootmem(void) { phys_addr_t mem_start = 0; - phys_addr_t start, end = 0; + phys_addr_t start, dram_end, end = 0; phys_addr_t vmlinux_end = __pa_symbol(&_end); phys_addr_t vmlinux_start = __pa_symbol(&_start); + phys_addr_t max_mapped_addr = __pa(~(ulong)0); u64 i; /* Find the memory region containing the kernel */ @@ -174,12 +175,23 @@ void __init setup_bootmem(void) * Make sure that any memory beyond mem_start + (-PAGE_OFFSET) is removed * as it is unusable by kernel. */ - memblock_enforce_memory_limit(mem_start - PAGE_OFFSET); + memblock_enforce_memory_limit(-PAGE_OFFSET); /* Reserve from the start of the kernel to the end of the kernel */ memblock_reserve(vmlinux_start, vmlinux_end - vmlinux_start); - max_pfn = PFN_DOWN(memblock_end_of_DRAM()); + dram_end = memblock_end_of_DRAM(); + + /* + * memblock allocator is not aware of the fact that last 4K bytes of + * the addressable memory can not be mapped because of IS_ERR_VALUE + * macro. Make sure that last 4k bytes are not usable by memblock + * if end of dram is equal to maximum addressable memory. + */ + if (max_mapped_addr == (dram_end - 1)) + memblock_set_current_limit(max_mapped_addr - 4096); + + max_pfn = PFN_DOWN(dram_end); max_low_pfn = max_pfn; set_max_mapnr(max_low_pfn); diff --git a/arch/riscv/mm/kasan_init.c b/arch/riscv/mm/kasan_init.c index 12ddd1f6bf70c8..a8a2ffd9114aaa 100644 --- a/arch/riscv/mm/kasan_init.c +++ b/arch/riscv/mm/kasan_init.c @@ -93,8 +93,8 @@ void __init kasan_init(void) VMALLOC_END)); for_each_mem_range(i, &_start, &_end) { - void *start = (void *)_start; - void *end = (void *)_end; + void *start = (void *)__va(_start); + void *end = (void *)__va(_end); if (start >= end) break; diff --git a/arch/s390/boot/uv.c b/arch/s390/boot/uv.c index a15c033f53ca42..87641dd65ccf91 100644 --- a/arch/s390/boot/uv.c +++ b/arch/s390/boot/uv.c @@ -35,7 +35,7 @@ void uv_query_info(void) uv_info.guest_cpu_stor_len = uvcb.cpu_stor_len; uv_info.max_sec_stor_addr = ALIGN(uvcb.max_guest_stor_addr, PAGE_SIZE); uv_info.max_num_sec_conf = uvcb.max_num_sec_conf; - uv_info.max_guest_cpus = uvcb.max_guest_cpus; + uv_info.max_guest_cpu_id = uvcb.max_guest_cpu_id; } #ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild index 319efa0e6d024f..1a18d7b82f86d7 100644 --- a/arch/s390/include/asm/Kbuild +++ b/arch/s390/include/asm/Kbuild @@ -7,5 +7,4 @@ generated-y += unistd_nr.h generic-y += asm-offsets.h generic-y += export.h generic-y += kvm_types.h -generic-y += local64.h generic-y += mcs_spinlock.h diff --git a/arch/s390/include/asm/uv.h b/arch/s390/include/asm/uv.h index 0325fc0469b7b0..7b98d4caee779c 100644 --- a/arch/s390/include/asm/uv.h +++ b/arch/s390/include/asm/uv.h @@ -96,7 +96,7 @@ struct uv_cb_qui { u32 max_num_sec_conf; u64 max_guest_stor_addr; u8 reserved88[158 - 136]; - u16 max_guest_cpus; + u16 max_guest_cpu_id; u8 reserveda0[200 - 160]; } __packed __aligned(8); @@ -273,7 +273,7 @@ struct uv_info { unsigned long guest_cpu_stor_len; unsigned long max_sec_stor_addr; unsigned int max_num_sec_conf; - unsigned short max_guest_cpus; + unsigned short max_guest_cpu_id; }; extern struct uv_info uv_info; diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 92beb14446449e..71203324ff42b0 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -110,9 +110,9 @@ _LPP_OFFSET = __LC_LPP #endif .endm - .macro SWITCH_ASYNC savearea,timer + .macro SWITCH_ASYNC savearea,timer,clock tmhh %r8,0x0001 # interrupting from user ? - jnz 2f + jnz 4f #if IS_ENABLED(CONFIG_KVM) lgr %r14,%r9 larl %r13,.Lsie_gmap @@ -125,10 +125,26 @@ _LPP_OFFSET = __LC_LPP #endif 0: larl %r13,.Lpsw_idle_exit cgr %r13,%r9 - jne 1f + jne 3f - mvc __CLOCK_IDLE_EXIT(8,%r2), __LC_INT_CLOCK - mvc __TIMER_IDLE_EXIT(8,%r2), __LC_ASYNC_ENTER_TIMER + larl %r1,smp_cpu_mtid + llgf %r1,0(%r1) + ltgr %r1,%r1 + jz 2f # no SMT, skip mt_cycles calculation + .insn rsy,0xeb0000000017,%r1,5,__SF_EMPTY+80(%r15) + larl %r3,mt_cycles + ag %r3,__LC_PERCPU_OFFSET + la %r4,__SF_EMPTY+16(%r15) +1: lg %r0,0(%r3) + slg %r0,0(%r4) + alg %r0,64(%r4) + stg %r0,0(%r3) + la %r3,8(%r3) + la %r4,8(%r4) + brct %r1,1b + +2: mvc __CLOCK_IDLE_EXIT(8,%r2), \clock + mvc __TIMER_IDLE_EXIT(8,%r2), \timer # account system time going idle ni __LC_CPU_FLAGS+7,255-_CIF_ENABLED_WAIT @@ -146,17 +162,17 @@ _LPP_OFFSET = __LC_LPP mvc __LC_LAST_UPDATE_TIMER(8),__TIMER_IDLE_EXIT(%r2) nihh %r8,0xfcfd # clear wait state and irq bits -1: lg %r14,__LC_ASYNC_STACK # are we already on the target stack? +3: lg %r14,__LC_ASYNC_STACK # are we already on the target stack? slgr %r14,%r15 srag %r14,%r14,STACK_SHIFT - jnz 3f + jnz 5f CHECK_STACK \savearea aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) - j 4f -2: UPDATE_VTIME %r14,%r15,\timer + j 6f +4: UPDATE_VTIME %r14,%r15,\timer BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP -3: lg %r15,__LC_ASYNC_STACK # load async stack -4: la %r11,STACK_FRAME_OVERHEAD(%r15) +5: lg %r15,__LC_ASYNC_STACK # load async stack +6: la %r11,STACK_FRAME_OVERHEAD(%r15) .endm .macro UPDATE_VTIME w1,w2,enter_timer @@ -390,6 +406,7 @@ ENTRY(system_call) mvc __PT_PSW(16,%r11),__LC_SVC_OLD_PSW mvc __PT_INT_CODE(4,%r11),__LC_SVC_ILC stg %r14,__PT_FLAGS(%r11) + xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) ENABLE_INTS .Lsysc_do_svc: # clear user controlled register to prevent speculative use @@ -406,7 +423,6 @@ ENTRY(system_call) jnl .Lsysc_nr_ok slag %r8,%r1,3 .Lsysc_nr_ok: - xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) stg %r2,__PT_ORIG_GPR2(%r11) stg %r7,STACK_FRAME_OVERHEAD(%r15) lg %r9,0(%r8,%r10) # get system call add. @@ -696,8 +712,8 @@ ENTRY(pgm_check_handler) mvc __THREAD_per_address(8,%r14),__LC_PER_ADDRESS mvc __THREAD_per_cause(2,%r14),__LC_PER_CODE mvc __THREAD_per_paid(1,%r14),__LC_PER_ACCESS_ID -6: RESTORE_SM_CLEAR_PER - xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) +6: xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) + RESTORE_SM_CLEAR_PER larl %r1,pgm_check_table llgh %r10,__PT_INT_CODE+2(%r11) nill %r10,0x007f @@ -718,8 +734,8 @@ ENTRY(pgm_check_handler) # PER event in supervisor state, must be kprobes # .Lpgm_kprobe: - RESTORE_SM_CLEAR_PER xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) + RESTORE_SM_CLEAR_PER lgr %r2,%r11 # pass pointer to pt_regs brasl %r14,do_per_trap j .Lpgm_return @@ -745,7 +761,7 @@ ENTRY(io_int_handler) stmg %r8,%r15,__LC_SAVE_AREA_ASYNC lg %r12,__LC_CURRENT lmg %r8,%r9,__LC_IO_OLD_PSW - SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_ENTER_TIMER + SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_ENTER_TIMER,__LC_INT_CLOCK stmg %r0,%r7,__PT_R0(%r11) # clear user controlled registers to prevent speculative use xgr %r0,%r0 @@ -761,10 +777,10 @@ ENTRY(io_int_handler) stmg %r8,%r9,__PT_PSW(%r11) mvc __PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11) + xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) TSTMSK __LC_CPU_FLAGS,_CIF_IGNORE_IRQ jo .Lio_restore TRACE_IRQS_OFF - xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) .Lio_loop: lgr %r2,%r11 # pass pointer to pt_regs lghi %r3,IO_INTERRUPT @@ -945,7 +961,7 @@ ENTRY(ext_int_handler) stmg %r8,%r15,__LC_SAVE_AREA_ASYNC lg %r12,__LC_CURRENT lmg %r8,%r9,__LC_EXT_OLD_PSW - SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_ENTER_TIMER + SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_ENTER_TIMER,__LC_INT_CLOCK stmg %r0,%r7,__PT_R0(%r11) # clear user controlled registers to prevent speculative use xgr %r0,%r0 @@ -964,10 +980,10 @@ ENTRY(ext_int_handler) mvc __PT_INT_PARM(4,%r11),__LC_EXT_PARAMS mvc __PT_INT_PARM_LONG(8,%r11),0(%r1) xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11) + xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) TSTMSK __LC_CPU_FLAGS,_CIF_IGNORE_IRQ jo .Lio_restore TRACE_IRQS_OFF - xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) lgr %r2,%r11 # pass pointer to pt_regs lghi %r3,EXT_INTERRUPT brasl %r14,do_IRQ @@ -1167,7 +1183,7 @@ ENTRY(mcck_int_handler) TSTMSK __LC_MCCK_CODE,MCCK_CODE_PSW_IA_VALID jno .Lmcck_panic 4: ssm __LC_PGM_NEW_PSW # turn dat on, keep irqs off - SWITCH_ASYNC __LC_GPREGS_SAVE_AREA+64,__LC_MCCK_ENTER_TIMER + SWITCH_ASYNC __LC_GPREGS_SAVE_AREA+64,__LC_MCCK_ENTER_TIMER,__LC_MCCK_CLOCK .Lmcck_skip: lghi %r14,__LC_GPREGS_SAVE_AREA+64 stmg %r0,%r7,__PT_R0(%r11) diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 390d97daa2b3ff..3a0d545f0ce844 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -896,24 +896,12 @@ static void __no_sanitize_address smp_start_secondary(void *cpuvoid) /* Upping and downing of CPUs */ int __cpu_up(unsigned int cpu, struct task_struct *tidle) { - struct pcpu *pcpu; - int base, i, rc; + struct pcpu *pcpu = pcpu_devices + cpu; + int rc; - pcpu = pcpu_devices + cpu; if (pcpu->state != CPU_STATE_CONFIGURED) return -EIO; - base = smp_get_base_cpu(cpu); - for (i = 0; i <= smp_cpu_mtid; i++) { - if (base + i < nr_cpu_ids) - if (cpu_online(base + i)) - break; - } - /* - * If this is the first CPU of the core to get online - * do an initial CPU reset. - */ - if (i > smp_cpu_mtid && - pcpu_sigp_retry(pcpu_devices + base, SIGP_INITIAL_CPU_RESET, 0) != + if (pcpu_sigp_retry(pcpu, SIGP_INITIAL_CPU_RESET, 0) != SIGP_CC_ORDER_CODE_ACCEPTED) return -EIO; diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c index 883bfed9f5c2ce..b2d2ad1530676d 100644 --- a/arch/s390/kernel/uv.c +++ b/arch/s390/kernel/uv.c @@ -368,7 +368,7 @@ static ssize_t uv_query_max_guest_cpus(struct kobject *kobj, struct kobj_attribute *attr, char *page) { return scnprintf(page, PAGE_SIZE, "%d\n", - uv_info.max_guest_cpus); + uv_info.max_guest_cpu_id + 1); } static struct kobj_attribute uv_query_max_guest_cpus_attr = diff --git a/arch/s390/lib/test_unwind.c b/arch/s390/lib/test_unwind.c index 7c988994931f03..6bad84c372dcbb 100644 --- a/arch/s390/lib/test_unwind.c +++ b/arch/s390/lib/test_unwind.c @@ -205,12 +205,15 @@ static noinline int unwindme_func3(struct unwindme *u) /* This function must appear in the backtrace. */ static noinline int unwindme_func2(struct unwindme *u) { + unsigned long flags; int rc; if (u->flags & UWM_SWITCH_STACK) { - preempt_disable(); + local_irq_save(flags); + local_mcck_disable(); rc = CALL_ON_STACK(unwindme_func3, S390_lowcore.nodat_stack, 1, u); - preempt_enable(); + local_mcck_enable(); + local_irq_restore(flags); return rc; } else { return unwindme_func3(u); diff --git a/arch/s390/purgatory/head.S b/arch/s390/purgatory/head.S index 5a10ce34b95d10..3d1c31e0cf3dd7 100644 --- a/arch/s390/purgatory/head.S +++ b/arch/s390/purgatory/head.S @@ -62,14 +62,15 @@ jh 10b .endm -.macro START_NEXT_KERNEL base +.macro START_NEXT_KERNEL base subcode lg %r4,kernel_entry-\base(%r13) lg %r5,load_psw_mask-\base(%r13) ogr %r4,%r5 stg %r4,0(%r0) xgr %r0,%r0 - diag %r0,%r0,0x308 + lghi %r1,\subcode + diag %r0,%r1,0x308 .endm .text @@ -123,7 +124,7 @@ ENTRY(purgatory_start) je .start_crash_kernel /* start normal kernel */ - START_NEXT_KERNEL .base_crash + START_NEXT_KERNEL .base_crash 0 .return_old_kernel: lmg %r6,%r15,gprregs-.base_crash(%r13) @@ -227,7 +228,7 @@ ENTRY(purgatory_start) MEMCPY %r9,%r10,%r11 /* start crash kernel */ - START_NEXT_KERNEL .base_dst + START_NEXT_KERNEL .base_dst 1 load_psw_mask: diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index 159da4ed578f23..b6f3d49991d37f 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -30,7 +30,6 @@ config SUPERH select HAVE_ARCH_KGDB select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_TRACEHOOK - select HAVE_COPY_THREAD_TLS select HAVE_DEBUG_BUGVERBOSE select HAVE_DEBUG_KMEMLEAK select HAVE_DYNAMIC_FTRACE diff --git a/arch/sh/drivers/dma/Kconfig b/arch/sh/drivers/dma/Kconfig index d0de378beefe5a..7d54f284ce10fb 100644 --- a/arch/sh/drivers/dma/Kconfig +++ b/arch/sh/drivers/dma/Kconfig @@ -63,8 +63,7 @@ config PVR2_DMA config G2_DMA tristate "G2 Bus DMA support" - depends on SH_DREAMCAST - select SH_DMA_API + depends on SH_DREAMCAST && SH_DMA_API help This enables support for the DMA controller for the Dreamcast's G2 bus. Drivers that want this will generally enable this on diff --git a/arch/sh/include/asm/Kbuild b/arch/sh/include/asm/Kbuild index 7435182ef84658..fc44d9c88b4191 100644 --- a/arch/sh/include/asm/Kbuild +++ b/arch/sh/include/asm/Kbuild @@ -1,6 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 generated-y += syscall_table.h generic-y += kvm_para.h -generic-y += local64.h generic-y += mcs_spinlock.h generic-y += parport.h diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild index 5269a704801fa4..3688fdae50e45c 100644 --- a/arch/sparc/include/asm/Kbuild +++ b/arch/sparc/include/asm/Kbuild @@ -6,5 +6,4 @@ generated-y += syscall_table_64.h generated-y += syscall_table_c32.h generic-y += export.h generic-y += kvm_para.h -generic-y += local64.h generic-y += mcs_spinlock.h diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 96edf64d4fb304..182bb7bdaa0a15 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -2894,7 +2894,7 @@ pgtable_t pte_alloc_one(struct mm_struct *mm) if (!page) return NULL; if (!pgtable_pte_page_ctor(page)) { - free_unref_page(page); + __free_page(page); return NULL; } return (pte_t *) page_address(page); diff --git a/arch/um/drivers/chan_user.c b/arch/um/drivers/chan_user.c index 4d80526a4236e6..d8845d4aac6a75 100644 --- a/arch/um/drivers/chan_user.c +++ b/arch/um/drivers/chan_user.c @@ -26,10 +26,10 @@ int generic_read(int fd, char *c_out, void *unused) n = read(fd, c_out, sizeof(*c_out)); if (n > 0) return n; - else if (errno == EAGAIN) - return 0; else if (n == 0) return -EIO; + else if (errno == EAGAIN) + return 0; return -errno; } diff --git a/arch/um/drivers/random.c b/arch/um/drivers/random.c index ce115fce52f02a..e4b9b2ce9abf43 100644 --- a/arch/um/drivers/random.c +++ b/arch/um/drivers/random.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -18,9 +19,8 @@ #include /* - * core module and version information + * core module information */ -#define RNG_VERSION "1.0.0" #define RNG_MODULE_NAME "hw_random" /* Changed at init time, in the non-modular case, and at module load @@ -28,88 +28,36 @@ * protects against a module being loaded twice at the same time. */ static int random_fd = -1; -static DECLARE_WAIT_QUEUE_HEAD(host_read_wait); +static struct hwrng hwrng = { 0, }; +static DECLARE_COMPLETION(have_data); -static int rng_dev_open (struct inode *inode, struct file *filp) +static int rng_dev_read(struct hwrng *rng, void *buf, size_t max, bool block) { - /* enforce read-only access to this chrdev */ - if ((filp->f_mode & FMODE_READ) == 0) - return -EINVAL; - if ((filp->f_mode & FMODE_WRITE) != 0) - return -EINVAL; + int ret; - return 0; -} - -static atomic_t host_sleep_count = ATOMIC_INIT(0); - -static ssize_t rng_dev_read (struct file *filp, char __user *buf, size_t size, - loff_t *offp) -{ - u32 data; - int n, ret = 0, have_data; - - while (size) { - n = os_read_file(random_fd, &data, sizeof(data)); - if (n > 0) { - have_data = n; - while (have_data && size) { - if (put_user((u8) data, buf++)) { - ret = ret ? : -EFAULT; - break; - } - size--; - ret++; - have_data--; - data >>= 8; - } - } - else if (n == -EAGAIN) { - DECLARE_WAITQUEUE(wait, current); - - if (filp->f_flags & O_NONBLOCK) - return ret ? : -EAGAIN; - - atomic_inc(&host_sleep_count); + for (;;) { + ret = os_read_file(random_fd, buf, max); + if (block && ret == -EAGAIN) { add_sigio_fd(random_fd); - add_wait_queue(&host_read_wait, &wait); - set_current_state(TASK_INTERRUPTIBLE); + ret = wait_for_completion_killable(&have_data); - schedule(); - remove_wait_queue(&host_read_wait, &wait); + ignore_sigio_fd(random_fd); + deactivate_fd(random_fd, RANDOM_IRQ); - if (atomic_dec_and_test(&host_sleep_count)) { - ignore_sigio_fd(random_fd); - deactivate_fd(random_fd, RANDOM_IRQ); - } + if (ret < 0) + break; + } else { + break; } - else - return n; - - if (signal_pending (current)) - return ret ? : -ERESTARTSYS; } - return ret; -} -static const struct file_operations rng_chrdev_ops = { - .owner = THIS_MODULE, - .open = rng_dev_open, - .read = rng_dev_read, - .llseek = noop_llseek, -}; - -/* rng_init shouldn't be called more than once at boot time */ -static struct miscdevice rng_miscdev = { - HWRNG_MINOR, - RNG_MODULE_NAME, - &rng_chrdev_ops, -}; + return ret != -EAGAIN ? ret : 0; +} static irqreturn_t random_interrupt(int irq, void *data) { - wake_up(&host_read_wait); + complete(&have_data); return IRQ_HANDLED; } @@ -126,18 +74,19 @@ static int __init rng_init (void) goto out; random_fd = err; - err = um_request_irq(RANDOM_IRQ, random_fd, IRQ_READ, random_interrupt, 0, "random", NULL); if (err) goto err_out_cleanup_hw; sigio_broken(random_fd, 1); + hwrng.name = RNG_MODULE_NAME; + hwrng.read = rng_dev_read; + hwrng.quality = 1024; - err = misc_register (&rng_miscdev); + err = hwrng_register(&hwrng); if (err) { - printk (KERN_ERR RNG_MODULE_NAME ": misc device register " - "failed\n"); + pr_err(RNG_MODULE_NAME " registering failed (%d)\n", err); goto err_out_cleanup_hw; } out: @@ -161,8 +110,8 @@ static void cleanup(void) static void __exit rng_cleanup(void) { + hwrng_unregister(&hwrng); os_close_file(random_fd); - misc_deregister (&rng_miscdev); } module_init (rng_init); diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index eae8c83364f718..b12c1b0d3e1d0b 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -47,18 +47,25 @@ /* Max request size is determined by sector mask - 32K */ #define UBD_MAX_REQUEST (8 * sizeof(long)) +struct io_desc { + char *buffer; + unsigned long length; + unsigned long sector_mask; + unsigned long long cow_offset; + unsigned long bitmap_words[2]; +}; + struct io_thread_req { struct request *req; int fds[2]; unsigned long offsets[2]; unsigned long long offset; - unsigned long length; - char *buffer; int sectorsize; - unsigned long sector_mask; - unsigned long long cow_offset; - unsigned long bitmap_words[2]; int error; + + int desc_cnt; + /* io_desc has to be the last element of the struct */ + struct io_desc io_desc[]; }; @@ -525,12 +532,7 @@ static void ubd_handler(void) blk_queue_max_write_zeroes_sectors(io_req->req->q, 0); blk_queue_flag_clear(QUEUE_FLAG_DISCARD, io_req->req->q); } - if ((io_req->error) || (io_req->buffer == NULL)) - blk_mq_end_request(io_req->req, io_req->error); - else { - if (!blk_update_request(io_req->req, io_req->error, io_req->length)) - __blk_mq_end_request(io_req->req, io_req->error); - } + blk_mq_end_request(io_req->req, io_req->error); kfree(io_req); } } @@ -946,6 +948,7 @@ static int ubd_add(int n, char **error_out) blk_queue_write_cache(ubd_dev->queue, true, false); blk_queue_max_segments(ubd_dev->queue, MAX_SG); + blk_queue_segment_boundary(ubd_dev->queue, PAGE_SIZE - 1); err = ubd_disk_register(UBD_MAJOR, ubd_dev->size, n, &ubd_gendisk[n]); if(err){ *error_out = "Failed to register device"; @@ -1289,37 +1292,74 @@ static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask, *cow_offset += bitmap_offset; } -static void cowify_req(struct io_thread_req *req, unsigned long *bitmap, +static void cowify_req(struct io_thread_req *req, struct io_desc *segment, + unsigned long offset, unsigned long *bitmap, __u64 bitmap_offset, __u64 bitmap_len) { - __u64 sector = req->offset >> SECTOR_SHIFT; + __u64 sector = offset >> SECTOR_SHIFT; int i; - if (req->length > (sizeof(req->sector_mask) * 8) << SECTOR_SHIFT) + if (segment->length > (sizeof(segment->sector_mask) * 8) << SECTOR_SHIFT) panic("Operation too long"); if (req_op(req->req) == REQ_OP_READ) { - for (i = 0; i < req->length >> SECTOR_SHIFT; i++) { + for (i = 0; i < segment->length >> SECTOR_SHIFT; i++) { if(ubd_test_bit(sector + i, (unsigned char *) bitmap)) ubd_set_bit(i, (unsigned char *) - &req->sector_mask); + &segment->sector_mask); + } + } else { + cowify_bitmap(offset, segment->length, &segment->sector_mask, + &segment->cow_offset, bitmap, bitmap_offset, + segment->bitmap_words, bitmap_len); + } +} + +static void ubd_map_req(struct ubd *dev, struct io_thread_req *io_req, + struct request *req) +{ + struct bio_vec bvec; + struct req_iterator iter; + int i = 0; + unsigned long byte_offset = io_req->offset; + int op = req_op(req); + + if (op == REQ_OP_WRITE_ZEROES || op == REQ_OP_DISCARD) { + io_req->io_desc[0].buffer = NULL; + io_req->io_desc[0].length = blk_rq_bytes(req); + } else { + rq_for_each_segment(bvec, req, iter) { + BUG_ON(i >= io_req->desc_cnt); + + io_req->io_desc[i].buffer = + page_address(bvec.bv_page) + bvec.bv_offset; + io_req->io_desc[i].length = bvec.bv_len; + i++; + } + } + + if (dev->cow.file) { + for (i = 0; i < io_req->desc_cnt; i++) { + cowify_req(io_req, &io_req->io_desc[i], byte_offset, + dev->cow.bitmap, dev->cow.bitmap_offset, + dev->cow.bitmap_len); + byte_offset += io_req->io_desc[i].length; } + } - else cowify_bitmap(req->offset, req->length, &req->sector_mask, - &req->cow_offset, bitmap, bitmap_offset, - req->bitmap_words, bitmap_len); } -static int ubd_queue_one_vec(struct blk_mq_hw_ctx *hctx, struct request *req, - u64 off, struct bio_vec *bvec) +static struct io_thread_req *ubd_alloc_req(struct ubd *dev, struct request *req, + int desc_cnt) { - struct ubd *dev = hctx->queue->queuedata; struct io_thread_req *io_req; - int ret; + int i; - io_req = kmalloc(sizeof(struct io_thread_req), GFP_ATOMIC); + io_req = kmalloc(sizeof(*io_req) + + (desc_cnt * sizeof(struct io_desc)), + GFP_ATOMIC); if (!io_req) - return -ENOMEM; + return NULL; io_req->req = req; if (dev->cow.file) @@ -1327,26 +1367,41 @@ static int ubd_queue_one_vec(struct blk_mq_hw_ctx *hctx, struct request *req, else io_req->fds[0] = dev->fd; io_req->error = 0; - - if (bvec != NULL) { - io_req->buffer = page_address(bvec->bv_page) + bvec->bv_offset; - io_req->length = bvec->bv_len; - } else { - io_req->buffer = NULL; - io_req->length = blk_rq_bytes(req); - } - io_req->sectorsize = SECTOR_SIZE; io_req->fds[1] = dev->fd; - io_req->cow_offset = -1; - io_req->offset = off; - io_req->sector_mask = 0; + io_req->offset = (u64) blk_rq_pos(req) << SECTOR_SHIFT; io_req->offsets[0] = 0; io_req->offsets[1] = dev->cow.data_offset; - if (dev->cow.file) - cowify_req(io_req, dev->cow.bitmap, - dev->cow.bitmap_offset, dev->cow.bitmap_len); + for (i = 0 ; i < desc_cnt; i++) { + io_req->io_desc[i].sector_mask = 0; + io_req->io_desc[i].cow_offset = -1; + } + + return io_req; +} + +static int ubd_submit_request(struct ubd *dev, struct request *req) +{ + int segs = 0; + struct io_thread_req *io_req; + int ret; + int op = req_op(req); + + if (op == REQ_OP_FLUSH) + segs = 0; + else if (op == REQ_OP_WRITE_ZEROES || op == REQ_OP_DISCARD) + segs = 1; + else + segs = blk_rq_nr_phys_segments(req); + + io_req = ubd_alloc_req(dev, req, segs); + if (!io_req) + return -ENOMEM; + + io_req->desc_cnt = segs; + if (segs) + ubd_map_req(dev, io_req, req); ret = os_write_file(thread_fd, &io_req, sizeof(io_req)); if (ret != sizeof(io_req)) { @@ -1357,22 +1412,6 @@ static int ubd_queue_one_vec(struct blk_mq_hw_ctx *hctx, struct request *req, return ret; } -static int queue_rw_req(struct blk_mq_hw_ctx *hctx, struct request *req) -{ - struct req_iterator iter; - struct bio_vec bvec; - int ret; - u64 off = (u64)blk_rq_pos(req) << SECTOR_SHIFT; - - rq_for_each_segment(bvec, req, iter) { - ret = ubd_queue_one_vec(hctx, req, off, &bvec); - if (ret < 0) - return ret; - off += bvec.bv_len; - } - return 0; -} - static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd) { @@ -1385,17 +1424,12 @@ static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx, spin_lock_irq(&ubd_dev->lock); switch (req_op(req)) { - /* operations with no lentgth/offset arguments */ case REQ_OP_FLUSH: - ret = ubd_queue_one_vec(hctx, req, 0, NULL); - break; case REQ_OP_READ: case REQ_OP_WRITE: - ret = queue_rw_req(hctx, req); - break; case REQ_OP_DISCARD: case REQ_OP_WRITE_ZEROES: - ret = ubd_queue_one_vec(hctx, req, (u64)blk_rq_pos(req) << 9, NULL); + ret = ubd_submit_request(ubd_dev, req); break; default: WARN_ON_ONCE(1); @@ -1483,22 +1517,22 @@ static int map_error(int error_code) * will result in unpredictable behaviour and/or crashes. */ -static int update_bitmap(struct io_thread_req *req) +static int update_bitmap(struct io_thread_req *req, struct io_desc *segment) { int n; - if(req->cow_offset == -1) + if (segment->cow_offset == -1) return map_error(0); - n = os_pwrite_file(req->fds[1], &req->bitmap_words, - sizeof(req->bitmap_words), req->cow_offset); - if (n != sizeof(req->bitmap_words)) + n = os_pwrite_file(req->fds[1], &segment->bitmap_words, + sizeof(segment->bitmap_words), segment->cow_offset); + if (n != sizeof(segment->bitmap_words)) return map_error(-n); return map_error(0); } -static void do_io(struct io_thread_req *req) +static void do_io(struct io_thread_req *req, struct io_desc *desc) { char *buf = NULL; unsigned long len; @@ -1513,21 +1547,20 @@ static void do_io(struct io_thread_req *req) return; } - nsectors = req->length / req->sectorsize; + nsectors = desc->length / req->sectorsize; start = 0; do { - bit = ubd_test_bit(start, (unsigned char *) &req->sector_mask); + bit = ubd_test_bit(start, (unsigned char *) &desc->sector_mask); end = start; while((end < nsectors) && - (ubd_test_bit(end, (unsigned char *) - &req->sector_mask) == bit)) + (ubd_test_bit(end, (unsigned char *) &desc->sector_mask) == bit)) end++; off = req->offset + req->offsets[bit] + start * req->sectorsize; len = (end - start) * req->sectorsize; - if (req->buffer != NULL) - buf = &req->buffer[start * req->sectorsize]; + if (desc->buffer != NULL) + buf = &desc->buffer[start * req->sectorsize]; switch (req_op(req->req)) { case REQ_OP_READ: @@ -1567,7 +1600,8 @@ static void do_io(struct io_thread_req *req) start = end; } while(start < nsectors); - req->error = update_bitmap(req); + req->offset += len; + req->error = update_bitmap(req, desc); } /* Changed in start_io_thread, which is serialized by being called only @@ -1600,8 +1634,13 @@ int io_thread(void *arg) } for (count = 0; count < n/sizeof(struct io_thread_req *); count++) { + struct io_thread_req *req = (*io_req_buffer)[count]; + int i; + io_count++; - do_io((*io_req_buffer)[count]); + for (i = 0; !req->error && i < req->desc_cnt; i++) + do_io(req, &(req->io_desc[i])); + } written = 0; diff --git a/arch/um/drivers/xterm.c b/arch/um/drivers/xterm.c index fc7f1e74670325..87ca4a47cd66e5 100644 --- a/arch/um/drivers/xterm.c +++ b/arch/um/drivers/xterm.c @@ -18,6 +18,7 @@ struct xterm_chan { int pid; int helper_pid; + int chan_fd; char *title; int device; int raw; @@ -33,6 +34,7 @@ static void *xterm_init(char *str, int device, const struct chan_opts *opts) return NULL; *data = ((struct xterm_chan) { .pid = -1, .helper_pid = -1, + .chan_fd = -1, .device = device, .title = opts->xterm_title, .raw = opts->raw } ); @@ -149,6 +151,7 @@ static int xterm_open(int input, int output, int primary, void *d, goto out_kill; } + data->chan_fd = fd; new = xterm_fd(fd, &data->helper_pid); if (new < 0) { err = new; @@ -206,6 +209,8 @@ static void xterm_close(int fd, void *d) os_kill_process(data->helper_pid, 0); data->helper_pid = -1; + if (data->chan_fd != -1) + os_close_file(data->chan_fd); os_close_file(fd); } diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c index 3d109ff3309b22..8dafc3f2add420 100644 --- a/arch/um/kernel/time.c +++ b/arch/um/kernel/time.c @@ -260,11 +260,6 @@ static void __time_travel_add_event(struct time_travel_event *e, struct time_travel_event *tmp; bool inserted = false; - if (WARN(time_travel_mode == TT_MODE_BASIC && - e != &time_travel_timer_event, - "only timer events can be handled in basic mode")) - return; - if (e->pending) return; diff --git a/arch/um/os-Linux/irq.c b/arch/um/os-Linux/irq.c index d508310ee5e1ed..f1732c308c6156 100644 --- a/arch/um/os-Linux/irq.c +++ b/arch/um/os-Linux/irq.c @@ -48,7 +48,7 @@ int os_epoll_triggered(int index, int events) int os_event_mask(int irq_type) { if (irq_type == IRQ_READ) - return EPOLLIN | EPOLLPRI; + return EPOLLIN | EPOLLPRI | EPOLLERR | EPOLLHUP | EPOLLRDHUP; if (irq_type == IRQ_WRITE) return EPOLLOUT; return 0; diff --git a/arch/um/os-Linux/umid.c b/arch/um/os-Linux/umid.c index 1d7558dac75f3a..a3dd61521d240d 100644 --- a/arch/um/os-Linux/umid.c +++ b/arch/um/os-Linux/umid.c @@ -137,20 +137,13 @@ static inline int is_umdir_used(char *dir) { char pid[sizeof("nnnnnnnnn")], *end, *file; int dead, fd, p, n, err; - size_t filelen; + size_t filelen = strlen(dir) + sizeof("/pid") + 1; - err = asprintf(&file, "%s/pid", dir); - if (err < 0) - return 0; - - filelen = strlen(file); + file = malloc(filelen); + if (!file) + return -ENOMEM; - n = snprintf(file, filelen, "%s/pid", dir); - if (n >= filelen) { - printk(UM_KERN_ERR "is_umdir_used - pid filename too long\n"); - err = -E2BIG; - goto out; - } + snprintf(file, filelen, "%s/pid", dir); dead = 0; fd = open(file, O_RDONLY); diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index fbf26e0f7a6a9b..3a5ecb1039bfb8 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -18,6 +18,7 @@ config X86_32 select MODULES_USE_ELF_REL select OLD_SIGACTION select GENERIC_VDSO_32 + select ARCH_SPLIT_ARG64 config X86_64 def_bool y diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 870efeec8bdacc..94c6e6330e0435 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -73,10 +73,8 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs, unsigned int nr) { if (likely(nr < IA32_NR_syscalls)) { - instrumentation_begin(); nr = array_index_nospec(nr, IA32_NR_syscalls); regs->ax = ia32_sys_call_table[nr](regs); - instrumentation_end(); } } @@ -91,8 +89,11 @@ __visible noinstr void do_int80_syscall_32(struct pt_regs *regs) * or may not be necessary, but it matches the old asm behavior. */ nr = (unsigned int)syscall_enter_from_user_mode(regs, nr); + instrumentation_begin(); do_syscall_32_irqs_on(regs, nr); + + instrumentation_end(); syscall_exit_to_user_mode(regs); } @@ -121,11 +122,12 @@ static noinstr bool __do_fast_syscall_32(struct pt_regs *regs) res = get_user(*(u32 *)®s->bp, (u32 __user __force *)(unsigned long)(u32)regs->sp); } - instrumentation_end(); if (res) { /* User code screwed up. */ regs->ax = -EFAULT; + + instrumentation_end(); syscall_exit_to_user_mode(regs); return false; } @@ -135,6 +137,8 @@ static noinstr bool __do_fast_syscall_32(struct pt_regs *regs) /* Now this is just like a normal syscall. */ do_syscall_32_irqs_on(regs, nr); + + instrumentation_end(); syscall_exit_to_user_mode(regs); return true; } diff --git a/arch/x86/entry/thunk_64.S b/arch/x86/entry/thunk_64.S index ccd32877a3c414..c9a9fbf1655f3e 100644 --- a/arch/x86/entry/thunk_64.S +++ b/arch/x86/entry/thunk_64.S @@ -31,7 +31,7 @@ SYM_FUNC_START_NOALIGN(\name) .endif call \func - jmp .L_restore + jmp __thunk_restore SYM_FUNC_END(\name) _ASM_NOKPROBE(\name) .endm @@ -44,7 +44,7 @@ SYM_FUNC_END(\name) #endif #ifdef CONFIG_PREEMPTION -SYM_CODE_START_LOCAL_NOALIGN(.L_restore) +SYM_CODE_START_LOCAL_NOALIGN(__thunk_restore) popq %r11 popq %r10 popq %r9 @@ -56,6 +56,6 @@ SYM_CODE_START_LOCAL_NOALIGN(.L_restore) popq %rdi popq %rbp ret - _ASM_NOKPROBE(.L_restore) -SYM_CODE_END(.L_restore) + _ASM_NOKPROBE(__thunk_restore) +SYM_CODE_END(__thunk_restore) #endif diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index af457f8cb29dda..7d4d89fa8647a9 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -257,7 +257,8 @@ static struct event_constraint intel_icl_event_constraints[] = { INTEL_EVENT_CONSTRAINT_RANGE(0x48, 0x54, 0xf), INTEL_EVENT_CONSTRAINT_RANGE(0x60, 0x8b, 0xf), INTEL_UEVENT_CONSTRAINT(0x04a3, 0xff), /* CYCLE_ACTIVITY.STALLS_TOTAL */ - INTEL_UEVENT_CONSTRAINT(0x10a3, 0xff), /* CYCLE_ACTIVITY.STALLS_MEM_ANY */ + INTEL_UEVENT_CONSTRAINT(0x10a3, 0xff), /* CYCLE_ACTIVITY.CYCLES_MEM_ANY */ + INTEL_UEVENT_CONSTRAINT(0x14a3, 0xff), /* CYCLE_ACTIVITY.STALLS_MEM_ANY */ INTEL_EVENT_CONSTRAINT(0xa3, 0xf), /* CYCLE_ACTIVITY.* */ INTEL_EVENT_CONSTRAINT_RANGE(0xa8, 0xb0, 0xf), INTEL_EVENT_CONSTRAINT_RANGE(0xb7, 0xbd, 0xf), @@ -5464,7 +5465,7 @@ __init int intel_pmu_init(void) mem_attr = icl_events_attrs; td_attr = icl_td_events_attrs; tsx_attr = icl_tsx_events_attrs; - x86_pmu.rtm_abort_event = X86_CONFIG(.event=0xca, .umask=0x02); + x86_pmu.rtm_abort_event = X86_CONFIG(.event=0xc9, .umask=0x04); x86_pmu.lbr_pt_coexist = true; intel_pmu_pebs_data_source_skl(pmem); x86_pmu.update_topdown_event = icl_update_topdown_event; diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index 8961653c5dd2ba..e2b0efcba1017b 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -919,7 +919,7 @@ static __always_inline bool get_lbr_predicted(u64 info) return !(info & LBR_INFO_MISPRED); } -static __always_inline bool get_lbr_cycles(u64 info) +static __always_inline u16 get_lbr_cycles(u64 info) { if (static_cpu_has(X86_FEATURE_ARCH_LBR) && !(x86_pmu.lbr_timed_lbr && info & LBR_INFO_CYC_CNT_VALID)) diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index e04d90af4c27cd..6375967a8244dc 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -26,6 +27,8 @@ #include #include +int hyperv_init_cpuhp; + void *hv_hypercall_pg; EXPORT_SYMBOL_GPL(hv_hypercall_pg); @@ -312,6 +315,25 @@ static struct syscore_ops hv_syscore_ops = { .resume = hv_resume, }; +static void (* __initdata old_setup_percpu_clockev)(void); + +static void __init hv_stimer_setup_percpu_clockev(void) +{ + /* + * Ignore any errors in setting up stimer clockevents + * as we can run with the LAPIC timer as a fallback. + */ + (void)hv_stimer_alloc(); + + /* + * Still register the LAPIC timer, because the direct-mode STIMER is + * not supported by old versions of Hyper-V. This also allows users + * to switch to LAPIC timer via /sys, if they want to. + */ + if (old_setup_percpu_clockev) + old_setup_percpu_clockev(); +} + /* * This function is to be invoked early in the boot sequence after the * hypervisor has been detected. @@ -390,10 +412,14 @@ void __init hyperv_init(void) wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); /* - * Ignore any errors in setting up stimer clockevents - * as we can run with the LAPIC timer as a fallback. + * hyperv_init() is called before LAPIC is initialized: see + * apic_intr_mode_init() -> x86_platform.apic_post_init() and + * apic_bsp_setup() -> setup_local_APIC(). The direct-mode STIMER + * depends on LAPIC, so hv_stimer_alloc() should be called from + * x86_init.timers.setup_percpu_clockev. */ - (void)hv_stimer_alloc(); + old_setup_percpu_clockev = x86_init.timers.setup_percpu_clockev; + x86_init.timers.setup_percpu_clockev = hv_stimer_setup_percpu_clockev; hv_apic_init(); @@ -401,6 +427,7 @@ void __init hyperv_init(void) register_syscore_ops(&hv_syscore_ops); + hyperv_init_cpuhp = cpuhp; return; remove_cpuhp_state: diff --git a/arch/x86/hyperv/mmu.c b/arch/x86/hyperv/mmu.c index 5208ba49c89a96..2c87350c1fb095 100644 --- a/arch/x86/hyperv/mmu.c +++ b/arch/x86/hyperv/mmu.c @@ -66,11 +66,17 @@ static void hyperv_flush_tlb_others(const struct cpumask *cpus, if (!hv_hypercall_pg) goto do_native; - if (cpumask_empty(cpus)) - return; - local_irq_save(flags); + /* + * Only check the mask _after_ interrupt has been disabled to avoid the + * mask changing under our feet. + */ + if (cpumask_empty(cpus)) { + local_irq_restore(flags); + return; + } + flush_pcpu = (struct hv_tlb_flush **) this_cpu_ptr(hyperv_pcpu_input_arg); diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 4e3099d9ae625f..57af25cb44f632 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -259,6 +259,7 @@ static inline u64 native_x2apic_icr_read(void) extern int x2apic_mode; extern int x2apic_phys; +extern void __init x2apic_set_max_apicid(u32 apicid); extern void __init check_x2apic(void); extern void x2apic_setup(void); static inline int x2apic_enabled(void) diff --git a/arch/x86/include/asm/cacheinfo.h b/arch/x86/include/asm/cacheinfo.h index 86b63c7feab75d..86b2e0dcc4bfe0 100644 --- a/arch/x86/include/asm/cacheinfo.h +++ b/arch/x86/include/asm/cacheinfo.h @@ -2,7 +2,7 @@ #ifndef _ASM_X86_CACHEINFO_H #define _ASM_X86_CACHEINFO_H -void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, int cpu, u8 node_id); -void cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 *c, int cpu, u8 node_id); +void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, int cpu); +void cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 *c, int cpu); #endif /* _ASM_X86_CACHEINFO_H */ diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h index dcd9503b109838..38f4936045ab63 100644 --- a/arch/x86/include/asm/fpu/api.h +++ b/arch/x86/include/asm/fpu/api.h @@ -16,14 +16,25 @@ * Use kernel_fpu_begin/end() if you intend to use FPU in kernel context. It * disables preemption so be careful if you intend to use it for long periods * of time. - * If you intend to use the FPU in softirq you need to check first with + * If you intend to use the FPU in irq/softirq you need to check first with * irq_fpu_usable() if it is possible. */ -extern void kernel_fpu_begin(void); + +/* Kernel FPU states to initialize in kernel_fpu_begin_mask() */ +#define KFPU_387 _BITUL(0) /* 387 state will be initialized */ +#define KFPU_MXCSR _BITUL(1) /* MXCSR will be initialized */ + +extern void kernel_fpu_begin_mask(unsigned int kfpu_mask); extern void kernel_fpu_end(void); extern bool irq_fpu_usable(void); extern void fpregs_mark_activate(void); +/* Code that is unaware of kernel_fpu_begin_mask() can use this */ +static inline void kernel_fpu_begin(void) +{ + kernel_fpu_begin_mask(KFPU_387 | KFPU_MXCSR); +} + /* * Use fpregs_lock() while editing CPU's FPU registers or fpu->state. * A context switch will (and softirq might) save CPU's FPU registers to diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index b2442eb0ac2f80..eb01c2618a9df7 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -616,6 +616,7 @@ DECLARE_IDTENTRY_VC(X86_TRAP_VC, exc_vmm_communication); #ifdef CONFIG_XEN_PV DECLARE_IDTENTRY_XENCB(X86_TRAP_OTHER, exc_xen_hypervisor_callback); +DECLARE_IDTENTRY_RAW(X86_TRAP_OTHER, exc_xen_unknown_trap); #endif /* Device interrupts common/spurious */ diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index 5e658ba2654a7f..9abe842dbd843a 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -97,6 +97,7 @@ #define INTEL_FAM6_LAKEFIELD 0x8A #define INTEL_FAM6_ALDERLAKE 0x97 +#define INTEL_FAM6_ALDERLAKE_L 0x9A /* "Small Core" Processors (Atom) */ diff --git a/arch/x86/include/asm/local64.h b/arch/x86/include/asm/local64.h deleted file mode 100644 index 36c93b5cc239b6..00000000000000 --- a/arch/x86/include/asm/local64.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index a0f147893a0414..fc25c88c7ff29c 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -177,7 +177,8 @@ enum mce_notifier_prios { MCE_PRIO_EXTLOG, MCE_PRIO_UC, MCE_PRIO_EARLY, - MCE_PRIO_CEC + MCE_PRIO_CEC, + MCE_PRIO_HIGHEST = MCE_PRIO_CEC }; struct notifier_block; diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index ffc289992d1b0f..30f76b96685799 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -74,6 +74,8 @@ static inline void hv_disable_stimer0_percpu_irq(int irq) {} #if IS_ENABLED(CONFIG_HYPERV) +extern int hyperv_init_cpuhp; + extern void *hv_hypercall_pg; extern void __percpu **hyperv_pcpu_input_arg; diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index 0b4920a7238e38..e16cccdd04207c 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -86,7 +86,7 @@ static inline void do_trace_rdpmc(unsigned int msr, u64 val, int failed) {} * think of extending them - you will be slapped with a stinking trout or a frozen * shark will reach you, wherever you are! You've been warned. */ -static inline unsigned long long notrace __rdmsr(unsigned int msr) +static __always_inline unsigned long long __rdmsr(unsigned int msr) { DECLARE_ARGS(val, low, high); @@ -98,7 +98,7 @@ static inline unsigned long long notrace __rdmsr(unsigned int msr) return EAX_EDX_VAL(val, low, high); } -static inline void notrace __wrmsr(unsigned int msr, u32 low, u32 high) +static __always_inline void __wrmsr(unsigned int msr, u32 low, u32 high) { asm volatile("1: wrmsr\n" "2:\n" diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index f4234575f3fdb4..1f6caceccbb024 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -110,6 +110,8 @@ extern const struct cpumask *cpu_coregroup_mask(int cpu); #define topology_die_id(cpu) (cpu_data(cpu).cpu_die_id) #define topology_core_id(cpu) (cpu_data(cpu).cpu_core_id) +extern unsigned int __max_die_per_package; + #ifdef CONFIG_SMP #define topology_die_cpumask(cpu) (per_cpu(cpu_die_map, cpu)) #define topology_core_cpumask(cpu) (per_cpu(cpu_core_map, cpu)) @@ -118,8 +120,6 @@ extern const struct cpumask *cpu_coregroup_mask(int cpu); extern unsigned int __max_logical_packages; #define topology_max_packages() (__max_logical_packages) -extern unsigned int __max_die_per_package; - static inline int topology_max_die_per_package(void) { return __max_die_per_package; diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index b3eef1d5c90377..113f6ca7b82849 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1841,20 +1841,22 @@ static __init void try_to_enable_x2apic(int remap_mode) return; if (remap_mode != IRQ_REMAP_X2APIC_MODE) { - /* IR is required if there is APIC ID > 255 even when running - * under KVM + /* + * Using X2APIC without IR is not architecturally supported + * on bare metal but may be supported in guests. */ - if (max_physical_apicid > 255 || - !x86_init.hyper.x2apic_available()) { + if (!x86_init.hyper.x2apic_available()) { pr_info("x2apic: IRQ remapping doesn't support X2APIC mode\n"); x2apic_disable(); return; } /* - * without IR all CPUs can be addressed by IOAPIC/MSI - * only in physical mode + * Without IR, all CPUs can be addressed by IOAPIC/MSI only + * in physical mode, and CPUs with an APIC ID that cannnot + * be addressed must not be brought online. */ + x2apic_set_max_apicid(255); x2apic_phys = 1; } x2apic_enable(); diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index bc9693841353c8..e14eae6d6ea71a 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c @@ -8,6 +8,12 @@ int x2apic_phys; static struct apic apic_x2apic_phys; +static u32 x2apic_max_apicid __ro_after_init; + +void __init x2apic_set_max_apicid(u32 apicid) +{ + x2apic_max_apicid = apicid; +} static int __init set_x2apic_phys_mode(char *arg) { @@ -98,6 +104,9 @@ static int x2apic_phys_probe(void) /* Common x2apic functions, also used by x2apic_cluster */ int x2apic_apic_id_valid(u32 apicid) { + if (x2apic_max_apicid && apicid > x2apic_max_apicid) + return 0; + return 1; } diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 6062ce586b9599..a2551b10780c67 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -330,7 +330,6 @@ static void legacy_fixup_core_id(struct cpuinfo_x86 *c) */ static void amd_get_topology(struct cpuinfo_x86 *c) { - u8 node_id; int cpu = smp_processor_id(); /* get information required for multi-node processors */ @@ -340,7 +339,7 @@ static void amd_get_topology(struct cpuinfo_x86 *c) cpuid(0x8000001e, &eax, &ebx, &ecx, &edx); - node_id = ecx & 0xff; + c->cpu_die_id = ecx & 0xff; if (c->x86 == 0x15) c->cu_id = ebx & 0xff; @@ -360,15 +359,15 @@ static void amd_get_topology(struct cpuinfo_x86 *c) if (!err) c->x86_coreid_bits = get_count_order(c->x86_max_cores); - cacheinfo_amd_init_llc_id(c, cpu, node_id); + cacheinfo_amd_init_llc_id(c, cpu); } else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) { u64 value; rdmsrl(MSR_FAM10H_NODE_ID, value); - node_id = value & 7; + c->cpu_die_id = value & 7; - per_cpu(cpu_llc_id, cpu) = node_id; + per_cpu(cpu_llc_id, cpu) = c->cpu_die_id; } else return; @@ -393,7 +392,7 @@ static void amd_detect_cmp(struct cpuinfo_x86 *c) /* Convert the initial APIC ID into the socket ID */ c->phys_proc_id = c->initial_apicid >> bits; /* use socket ID also for last level cache */ - per_cpu(cpu_llc_id, cpu) = c->phys_proc_id; + per_cpu(cpu_llc_id, cpu) = c->cpu_die_id = c->phys_proc_id; } static void amd_detect_ppin(struct cpuinfo_x86 *c) @@ -570,12 +569,12 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) u32 ecx; ecx = cpuid_ecx(0x8000001e); - nodes_per_socket = ((ecx >> 8) & 7) + 1; + __max_die_per_package = nodes_per_socket = ((ecx >> 8) & 7) + 1; } else if (boot_cpu_has(X86_FEATURE_NODEID_MSR)) { u64 value; rdmsrl(MSR_FAM10H_NODE_ID, value); - nodes_per_socket = ((value >> 3) & 7) + 1; + __max_die_per_package = nodes_per_socket = ((value >> 3) & 7) + 1; } if (!boot_cpu_has(X86_FEATURE_AMD_SSBD) && diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c index 57074cf3ad7c1a..f9ac682e75e78d 100644 --- a/arch/x86/kernel/cpu/cacheinfo.c +++ b/arch/x86/kernel/cpu/cacheinfo.c @@ -646,7 +646,7 @@ static int find_num_cache_leaves(struct cpuinfo_x86 *c) return i; } -void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, int cpu, u8 node_id) +void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, int cpu) { /* * We may have multiple LLCs if L3 caches exist, so check if we @@ -657,7 +657,7 @@ void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, int cpu, u8 node_id) if (c->x86 < 0x17) { /* LLC is at the node level. */ - per_cpu(cpu_llc_id, cpu) = node_id; + per_cpu(cpu_llc_id, cpu) = c->cpu_die_id; } else if (c->x86 == 0x17 && c->x86_model <= 0x1F) { /* * LLC is at the core complex level. @@ -684,7 +684,7 @@ void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, int cpu, u8 node_id) } } -void cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 *c, int cpu, u8 node_id) +void cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 *c, int cpu) { /* * We may have multiple LLCs if L3 caches exist, so check if we diff --git a/arch/x86/kernel/cpu/hygon.c b/arch/x86/kernel/cpu/hygon.c index ac6c30e5801da2..dc0840aae26c14 100644 --- a/arch/x86/kernel/cpu/hygon.c +++ b/arch/x86/kernel/cpu/hygon.c @@ -65,7 +65,6 @@ static void hygon_get_topology_early(struct cpuinfo_x86 *c) */ static void hygon_get_topology(struct cpuinfo_x86 *c) { - u8 node_id; int cpu = smp_processor_id(); /* get information required for multi-node processors */ @@ -75,7 +74,7 @@ static void hygon_get_topology(struct cpuinfo_x86 *c) cpuid(0x8000001e, &eax, &ebx, &ecx, &edx); - node_id = ecx & 0xff; + c->cpu_die_id = ecx & 0xff; c->cpu_core_id = ebx & 0xff; @@ -93,14 +92,14 @@ static void hygon_get_topology(struct cpuinfo_x86 *c) /* Socket ID is ApicId[6] for these processors. */ c->phys_proc_id = c->apicid >> APICID_SOCKET_ID_BIT; - cacheinfo_hygon_init_llc_id(c, cpu, node_id); + cacheinfo_hygon_init_llc_id(c, cpu); } else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) { u64 value; rdmsrl(MSR_FAM10H_NODE_ID, value); - node_id = value & 7; + c->cpu_die_id = value & 7; - per_cpu(cpu_llc_id, cpu) = node_id; + per_cpu(cpu_llc_id, cpu) = c->cpu_die_id; } else return; @@ -123,7 +122,7 @@ static void hygon_detect_cmp(struct cpuinfo_x86 *c) /* Convert the initial APIC ID into the socket ID */ c->phys_proc_id = c->initial_apicid >> bits; /* use socket ID also for last level cache */ - per_cpu(cpu_llc_id, cpu) = c->phys_proc_id; + per_cpu(cpu_llc_id, cpu) = c->cpu_die_id = c->phys_proc_id; } static void srat_detect_node(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index 32b7099e351119..311688202ea51b 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -162,7 +162,8 @@ EXPORT_SYMBOL_GPL(mce_log); void mce_register_decode_chain(struct notifier_block *nb) { - if (WARN_ON(nb->priority > MCE_PRIO_MCELOG && nb->priority < MCE_PRIO_EDAC)) + if (WARN_ON(nb->priority < MCE_PRIO_LOWEST || + nb->priority > MCE_PRIO_HIGHEST)) return; blocking_notifier_chain_register(&x86_mce_decoder_chain, nb); diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 05ef1f4550cbd4..6cc50ab07bded7 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -135,14 +135,32 @@ static void hv_machine_shutdown(void) { if (kexec_in_progress && hv_kexec_handler) hv_kexec_handler(); + + /* + * Call hv_cpu_die() on all the CPUs, otherwise later the hypervisor + * corrupts the old VP Assist Pages and can crash the kexec kernel. + */ + if (kexec_in_progress && hyperv_init_cpuhp > 0) + cpuhp_remove_state(hyperv_init_cpuhp); + + /* The function calls stop_other_cpus(). */ native_machine_shutdown(); + + /* Disable the hypercall page when there is only 1 active CPU. */ + if (kexec_in_progress) + hyperv_cleanup(); } static void hv_machine_crash_shutdown(struct pt_regs *regs) { if (hv_crash_handler) hv_crash_handler(regs); + + /* The function calls crash_smp_send_stop(). */ native_machine_crash_shutdown(regs); + + /* Disable the hypercall page when there is only 1 active CPU. */ + hyperv_cleanup(); } #endif /* CONFIG_KEXEC_CORE */ #endif /* CONFIG_HYPERV */ diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 23ad8e953dfb15..a29997e6cf9e6c 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -167,9 +167,6 @@ static u8 mtrr_type_lookup_variable(u64 start, u64 end, u64 *partial_end, *repeat = 0; *uniform = 1; - /* Make end inclusive instead of exclusive */ - end--; - prev_match = MTRR_TYPE_INVALID; for (i = 0; i < num_var_ranges; ++i) { unsigned short start_state, end_state, inclusive; @@ -261,6 +258,9 @@ u8 mtrr_type_lookup(u64 start, u64 end, u8 *uniform) int repeat; u64 partial_end; + /* Make end inclusive instead of exclusive */ + end--; + if (!mtrr_state_set) return MTRR_TYPE_INVALID; diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index f3418428682b1a..5a59e3315b3403 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -525,89 +525,70 @@ static void rdtgroup_remove(struct rdtgroup *rdtgrp) kfree(rdtgrp); } -struct task_move_callback { - struct callback_head work; - struct rdtgroup *rdtgrp; -}; - -static void move_myself(struct callback_head *head) +static void _update_task_closid_rmid(void *task) { - struct task_move_callback *callback; - struct rdtgroup *rdtgrp; - - callback = container_of(head, struct task_move_callback, work); - rdtgrp = callback->rdtgrp; - /* - * If resource group was deleted before this task work callback - * was invoked, then assign the task to root group and free the - * resource group. + * If the task is still current on this CPU, update PQR_ASSOC MSR. + * Otherwise, the MSR is updated when the task is scheduled in. */ - if (atomic_dec_and_test(&rdtgrp->waitcount) && - (rdtgrp->flags & RDT_DELETED)) { - current->closid = 0; - current->rmid = 0; - rdtgroup_remove(rdtgrp); - } - - if (unlikely(current->flags & PF_EXITING)) - goto out; - - preempt_disable(); - /* update PQR_ASSOC MSR to make resource group go into effect */ - resctrl_sched_in(); - preempt_enable(); + if (task == current) + resctrl_sched_in(); +} -out: - kfree(callback); +static void update_task_closid_rmid(struct task_struct *t) +{ + if (IS_ENABLED(CONFIG_SMP) && task_curr(t)) + smp_call_function_single(task_cpu(t), _update_task_closid_rmid, t, 1); + else + _update_task_closid_rmid(t); } static int __rdtgroup_move_task(struct task_struct *tsk, struct rdtgroup *rdtgrp) { - struct task_move_callback *callback; - int ret; - - callback = kzalloc(sizeof(*callback), GFP_KERNEL); - if (!callback) - return -ENOMEM; - callback->work.func = move_myself; - callback->rdtgrp = rdtgrp; + /* If the task is already in rdtgrp, no need to move the task. */ + if ((rdtgrp->type == RDTCTRL_GROUP && tsk->closid == rdtgrp->closid && + tsk->rmid == rdtgrp->mon.rmid) || + (rdtgrp->type == RDTMON_GROUP && tsk->rmid == rdtgrp->mon.rmid && + tsk->closid == rdtgrp->mon.parent->closid)) + return 0; /* - * Take a refcount, so rdtgrp cannot be freed before the - * callback has been invoked. + * Set the task's closid/rmid before the PQR_ASSOC MSR can be + * updated by them. + * + * For ctrl_mon groups, move both closid and rmid. + * For monitor groups, can move the tasks only from + * their parent CTRL group. */ - atomic_inc(&rdtgrp->waitcount); - ret = task_work_add(tsk, &callback->work, TWA_RESUME); - if (ret) { - /* - * Task is exiting. Drop the refcount and free the callback. - * No need to check the refcount as the group cannot be - * deleted before the write function unlocks rdtgroup_mutex. - */ - atomic_dec(&rdtgrp->waitcount); - kfree(callback); - rdt_last_cmd_puts("Task exited\n"); - } else { - /* - * For ctrl_mon groups move both closid and rmid. - * For monitor groups, can move the tasks only from - * their parent CTRL group. - */ - if (rdtgrp->type == RDTCTRL_GROUP) { - tsk->closid = rdtgrp->closid; + + if (rdtgrp->type == RDTCTRL_GROUP) { + tsk->closid = rdtgrp->closid; + tsk->rmid = rdtgrp->mon.rmid; + } else if (rdtgrp->type == RDTMON_GROUP) { + if (rdtgrp->mon.parent->closid == tsk->closid) { tsk->rmid = rdtgrp->mon.rmid; - } else if (rdtgrp->type == RDTMON_GROUP) { - if (rdtgrp->mon.parent->closid == tsk->closid) { - tsk->rmid = rdtgrp->mon.rmid; - } else { - rdt_last_cmd_puts("Can't move task to different control group\n"); - ret = -EINVAL; - } + } else { + rdt_last_cmd_puts("Can't move task to different control group\n"); + return -EINVAL; } } - return ret; + + /* + * Ensure the task's closid and rmid are written before determining if + * the task is current that will decide if it will be interrupted. + */ + barrier(); + + /* + * By now, the task's closid and rmid are set. If the task is current + * on a CPU, the PQR_ASSOC MSR needs to be updated to make the resource + * group go into effect. If the task is not current, the MSR will be + * updated when the task is scheduled in. + */ + update_task_closid_rmid(tsk); + + return 0; } static bool is_closid_match(struct task_struct *t, struct rdtgroup *r) diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index d3a0791bc052ad..91288da2959951 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -25,10 +25,10 @@ #define BITS_SHIFT_NEXT_LEVEL(eax) ((eax) & 0x1f) #define LEVEL_MAX_SIBLINGS(ebx) ((ebx) & 0xffff) -#ifdef CONFIG_SMP unsigned int __max_die_per_package __read_mostly = 1; EXPORT_SYMBOL(__max_die_per_package); +#ifdef CONFIG_SMP /* * Check if given CPUID extended toplogy "leaf" is implemented */ diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index eb86a2b831b15a..571220ac8beaa7 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -121,7 +121,7 @@ int copy_fpregs_to_fpstate(struct fpu *fpu) } EXPORT_SYMBOL(copy_fpregs_to_fpstate); -void kernel_fpu_begin(void) +void kernel_fpu_begin_mask(unsigned int kfpu_mask) { preempt_disable(); @@ -141,13 +141,14 @@ void kernel_fpu_begin(void) } __cpu_invalidate_fpregs_state(); - if (boot_cpu_has(X86_FEATURE_XMM)) + /* Put sane initial values into the control registers. */ + if (likely(kfpu_mask & KFPU_MXCSR) && boot_cpu_has(X86_FEATURE_XMM)) ldmxcsr(MXCSR_DEFAULT); - if (boot_cpu_has(X86_FEATURE_FPU)) + if (unlikely(kfpu_mask & KFPU_387) && boot_cpu_has(X86_FEATURE_FPU)) asm volatile ("fninit"); } -EXPORT_SYMBOL_GPL(kernel_fpu_begin); +EXPORT_SYMBOL_GPL(kernel_fpu_begin_mask); void kernel_fpu_end(void) { diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 547c7abb39f513..39f7d8c3c064bb 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -937,6 +937,11 @@ int kprobe_fault_handler(struct pt_regs *regs, int trapnr) * So clear it by resetting the current kprobe: */ regs->flags &= ~X86_EFLAGS_TF; + /* + * Since the single step (trap) has been cancelled, + * we need to restore BTF here. + */ + restore_btf(); /* * If the TF flag was set before the kprobe hit, diff --git a/arch/x86/kernel/sev-es-shared.c b/arch/x86/kernel/sev-es-shared.c index 7d04b356d44d33..cdc04d09124233 100644 --- a/arch/x86/kernel/sev-es-shared.c +++ b/arch/x86/kernel/sev-es-shared.c @@ -305,14 +305,14 @@ static enum es_result vc_ioio_exitinfo(struct es_em_ctxt *ctxt, u64 *exitinfo) case 0xe4: case 0xe5: *exitinfo |= IOIO_TYPE_IN; - *exitinfo |= (u64)insn->immediate.value << 16; + *exitinfo |= (u8)insn->immediate.value << 16; break; /* OUT immediate opcodes */ case 0xe6: case 0xe7: *exitinfo |= IOIO_TYPE_OUT; - *exitinfo |= (u64)insn->immediate.value << 16; + *exitinfo |= (u8)insn->immediate.value << 16; break; /* IN register opcodes */ diff --git a/arch/x86/kernel/sev-es.c b/arch/x86/kernel/sev-es.c index 0bd1a0fc587e0f..84c1821819afb8 100644 --- a/arch/x86/kernel/sev-es.c +++ b/arch/x86/kernel/sev-es.c @@ -225,7 +225,7 @@ static inline u64 sev_es_rd_ghcb_msr(void) return __rdmsr(MSR_AMD64_SEV_ES_GHCB); } -static inline void sev_es_wr_ghcb_msr(u64 val) +static __always_inline void sev_es_wr_ghcb_msr(u64 val) { u32 low, high; @@ -286,6 +286,12 @@ static enum es_result vc_write_mem(struct es_em_ctxt *ctxt, u16 d2; u8 d1; + /* If instruction ran in kernel mode and the I/O buffer is in kernel space */ + if (!user_mode(ctxt->regs) && !access_ok(target, size)) { + memcpy(dst, buf, size); + return ES_OK; + } + switch (size) { case 1: memcpy(&d1, buf, 1); @@ -335,6 +341,12 @@ static enum es_result vc_read_mem(struct es_em_ctxt *ctxt, u16 d2; u8 d1; + /* If instruction ran in kernel mode and the I/O buffer is in kernel space */ + if (!user_mode(ctxt->regs) && !access_ok(s, size)) { + memcpy(buf, src, size); + return ES_OK; + } + switch (size) { case 1: if (get_user(d1, s)) diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c index ae64f98ec2ab6a..4c09ba1102047c 100644 --- a/arch/x86/kernel/tboot.c +++ b/arch/x86/kernel/tboot.c @@ -93,6 +93,7 @@ static struct mm_struct tboot_mm = { .pgd = swapper_pg_dir, .mm_users = ATOMIC_INIT(2), .mm_count = ATOMIC_INIT(1), + .write_protect_seq = SEQCNT_ZERO(tboot_mm.write_protect_seq), MMAP_LOCK_INITIALIZER(init_mm) .page_table_lock = __SPIN_LOCK_UNLOCKED(init_mm.page_table_lock), .mmlist = LIST_HEAD_INIT(init_mm.mmlist), diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index e19df6cde35d1d..170c94ec00685d 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -299,11 +299,12 @@ DEFINE_IDTENTRY_ERRORCODE(exc_alignment_check) local_irq_enable(); if (handle_user_split_lock(regs, error_code)) - return; + goto out; do_trap(X86_TRAP_AC, SIGBUS, "alignment check", regs, error_code, BUS_ADRALN, NULL); +out: local_irq_disable(); } diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index f7a6e8f83783cd..dc921d76e42e8b 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -264,6 +264,20 @@ static inline int guest_cpuid_stepping(struct kvm_vcpu *vcpu) return x86_stepping(best->eax); } +static inline bool guest_has_spec_ctrl_msr(struct kvm_vcpu *vcpu) +{ + return (guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL) || + guest_cpuid_has(vcpu, X86_FEATURE_AMD_STIBP) || + guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS) || + guest_cpuid_has(vcpu, X86_FEATURE_AMD_SSBD)); +} + +static inline bool guest_has_pred_cmd_msr(struct kvm_vcpu *vcpu) +{ + return (guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL) || + guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBPB)); +} + static inline bool supports_cpuid_fault(struct kvm_vcpu *vcpu) { return vcpu->arch.msr_platform_info & MSR_PLATFORM_INFO_CPUID_FAULT; diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index 9c4a9c8e43d90e..581925e476d6c5 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -49,7 +49,7 @@ static inline u64 rsvd_bits(int s, int e) if (e < s) return 0; - return ((1ULL << (e - s + 1)) - 1) << s; + return ((2ULL << (e - s)) - 1) << s; } void kvm_mmu_set_mmio_spte_mask(u64 mmio_value, u64 access_mask); diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 7a6ae9e90bd708..52f36c8790862b 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -3485,16 +3485,16 @@ static bool mmio_info_in_cache(struct kvm_vcpu *vcpu, u64 addr, bool direct) * Return the level of the lowest level SPTE added to sptes. * That SPTE may be non-present. */ -static int get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes) +static int get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes, int *root_level) { struct kvm_shadow_walk_iterator iterator; - int leaf = vcpu->arch.mmu->root_level; + int leaf = -1; u64 spte; - walk_shadow_page_lockless_begin(vcpu); - for (shadow_walk_init(&iterator, vcpu, addr); + for (shadow_walk_init(&iterator, vcpu, addr), + *root_level = iterator.level; shadow_walk_okay(&iterator); __shadow_walk_next(&iterator, spte)) { leaf = iterator.level; @@ -3504,7 +3504,6 @@ static int get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes) if (!is_shadow_present_pte(spte)) break; - } walk_shadow_page_lockless_end(vcpu); @@ -3517,9 +3516,7 @@ static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep) { u64 sptes[PT64_ROOT_MAX_LEVEL]; struct rsvd_bits_validate *rsvd_check; - int root = vcpu->arch.mmu->shadow_root_level; - int leaf; - int level; + int root, leaf, level; bool reserved = false; if (!VALID_PAGE(vcpu->arch.mmu->root_hpa)) { @@ -3528,9 +3525,14 @@ static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep) } if (is_tdp_mmu_root(vcpu->kvm, vcpu->arch.mmu->root_hpa)) - leaf = kvm_tdp_mmu_get_walk(vcpu, addr, sptes); + leaf = kvm_tdp_mmu_get_walk(vcpu, addr, sptes, &root); else - leaf = get_walk(vcpu, addr, sptes); + leaf = get_walk(vcpu, addr, sptes, &root); + + if (unlikely(leaf < 0)) { + *sptep = 0ull; + return reserved; + } rsvd_check = &vcpu->arch.mmu->shadow_zero_check; diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index 84c8f06bec261a..b9265a585ea3cd 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -42,7 +42,48 @@ void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm) WARN_ON(!list_empty(&kvm->arch.tdp_mmu_roots)); } -#define for_each_tdp_mmu_root(_kvm, _root) \ +static void tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root) +{ + if (kvm_mmu_put_root(kvm, root)) + kvm_tdp_mmu_free_root(kvm, root); +} + +static inline bool tdp_mmu_next_root_valid(struct kvm *kvm, + struct kvm_mmu_page *root) +{ + lockdep_assert_held(&kvm->mmu_lock); + + if (list_entry_is_head(root, &kvm->arch.tdp_mmu_roots, link)) + return false; + + kvm_mmu_get_root(kvm, root); + return true; + +} + +static inline struct kvm_mmu_page *tdp_mmu_next_root(struct kvm *kvm, + struct kvm_mmu_page *root) +{ + struct kvm_mmu_page *next_root; + + next_root = list_next_entry(root, link); + tdp_mmu_put_root(kvm, root); + return next_root; +} + +/* + * Note: this iterator gets and puts references to the roots it iterates over. + * This makes it safe to release the MMU lock and yield within the loop, but + * if exiting the loop early, the caller must drop the reference to the most + * recent root. (Unless keeping a live reference is desirable.) + */ +#define for_each_tdp_mmu_root_yield_safe(_kvm, _root) \ + for (_root = list_first_entry(&_kvm->arch.tdp_mmu_roots, \ + typeof(*_root), link); \ + tdp_mmu_next_root_valid(_kvm, _root); \ + _root = tdp_mmu_next_root(_kvm, _root)) + +#define for_each_tdp_mmu_root(_kvm, _root) \ list_for_each_entry(_root, &_kvm->arch.tdp_mmu_roots, link) bool is_tdp_mmu_root(struct kvm *kvm, hpa_t hpa) @@ -439,18 +480,9 @@ bool kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, gfn_t start, gfn_t end) struct kvm_mmu_page *root; bool flush = false; - for_each_tdp_mmu_root(kvm, root) { - /* - * Take a reference on the root so that it cannot be freed if - * this thread releases the MMU lock and yields in this loop. - */ - kvm_mmu_get_root(kvm, root); - + for_each_tdp_mmu_root_yield_safe(kvm, root) flush |= zap_gfn_range(kvm, root, start, end, true); - kvm_mmu_put_root(kvm, root); - } - return flush; } @@ -609,13 +641,7 @@ static int kvm_tdp_mmu_handle_hva_range(struct kvm *kvm, unsigned long start, int ret = 0; int as_id; - for_each_tdp_mmu_root(kvm, root) { - /* - * Take a reference on the root so that it cannot be freed if - * this thread releases the MMU lock and yields in this loop. - */ - kvm_mmu_get_root(kvm, root); - + for_each_tdp_mmu_root_yield_safe(kvm, root) { as_id = kvm_mmu_page_as_id(root); slots = __kvm_memslots(kvm, as_id); kvm_for_each_memslot(memslot, slots) { @@ -637,8 +663,6 @@ static int kvm_tdp_mmu_handle_hva_range(struct kvm *kvm, unsigned long start, ret |= handler(kvm, memslot, root, gfn_start, gfn_end, data); } - - kvm_mmu_put_root(kvm, root); } return ret; @@ -826,21 +850,13 @@ bool kvm_tdp_mmu_wrprot_slot(struct kvm *kvm, struct kvm_memory_slot *slot, int root_as_id; bool spte_set = false; - for_each_tdp_mmu_root(kvm, root) { + for_each_tdp_mmu_root_yield_safe(kvm, root) { root_as_id = kvm_mmu_page_as_id(root); if (root_as_id != slot->as_id) continue; - /* - * Take a reference on the root so that it cannot be freed if - * this thread releases the MMU lock and yields in this loop. - */ - kvm_mmu_get_root(kvm, root); - spte_set |= wrprot_gfn_range(kvm, root, slot->base_gfn, slot->base_gfn + slot->npages, min_level); - - kvm_mmu_put_root(kvm, root); } return spte_set; @@ -894,21 +910,13 @@ bool kvm_tdp_mmu_clear_dirty_slot(struct kvm *kvm, struct kvm_memory_slot *slot) int root_as_id; bool spte_set = false; - for_each_tdp_mmu_root(kvm, root) { + for_each_tdp_mmu_root_yield_safe(kvm, root) { root_as_id = kvm_mmu_page_as_id(root); if (root_as_id != slot->as_id) continue; - /* - * Take a reference on the root so that it cannot be freed if - * this thread releases the MMU lock and yields in this loop. - */ - kvm_mmu_get_root(kvm, root); - spte_set |= clear_dirty_gfn_range(kvm, root, slot->base_gfn, slot->base_gfn + slot->npages); - - kvm_mmu_put_root(kvm, root); } return spte_set; @@ -1017,21 +1025,13 @@ bool kvm_tdp_mmu_slot_set_dirty(struct kvm *kvm, struct kvm_memory_slot *slot) int root_as_id; bool spte_set = false; - for_each_tdp_mmu_root(kvm, root) { + for_each_tdp_mmu_root_yield_safe(kvm, root) { root_as_id = kvm_mmu_page_as_id(root); if (root_as_id != slot->as_id) continue; - /* - * Take a reference on the root so that it cannot be freed if - * this thread releases the MMU lock and yields in this loop. - */ - kvm_mmu_get_root(kvm, root); - spte_set |= set_dirty_gfn_range(kvm, root, slot->base_gfn, slot->base_gfn + slot->npages); - - kvm_mmu_put_root(kvm, root); } return spte_set; } @@ -1077,21 +1077,13 @@ void kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm, struct kvm_mmu_page *root; int root_as_id; - for_each_tdp_mmu_root(kvm, root) { + for_each_tdp_mmu_root_yield_safe(kvm, root) { root_as_id = kvm_mmu_page_as_id(root); if (root_as_id != slot->as_id) continue; - /* - * Take a reference on the root so that it cannot be freed if - * this thread releases the MMU lock and yields in this loop. - */ - kvm_mmu_get_root(kvm, root); - zap_collapsible_spte_range(kvm, root, slot->base_gfn, slot->base_gfn + slot->npages); - - kvm_mmu_put_root(kvm, root); } } @@ -1148,12 +1140,15 @@ bool kvm_tdp_mmu_write_protect_gfn(struct kvm *kvm, * Return the level of the lowest level SPTE added to sptes. * That SPTE may be non-present. */ -int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes) +int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes, + int *root_level) { struct tdp_iter iter; struct kvm_mmu *mmu = vcpu->arch.mmu; - int leaf = vcpu->arch.mmu->shadow_root_level; gfn_t gfn = addr >> PAGE_SHIFT; + int leaf = -1; + + *root_level = vcpu->arch.mmu->shadow_root_level; tdp_mmu_for_each_pte(iter, mmu, gfn, gfn + 1) { leaf = iter.level; diff --git a/arch/x86/kvm/mmu/tdp_mmu.h b/arch/x86/kvm/mmu/tdp_mmu.h index 556e065503f69b..cbbdbadd1526ff 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.h +++ b/arch/x86/kvm/mmu/tdp_mmu.h @@ -44,5 +44,7 @@ void kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm, bool kvm_tdp_mmu_write_protect_gfn(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn); -int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes); +int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes, + int *root_level); + #endif /* __KVM_X86_MMU_TDP_MMU_H */ diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 9e4c226dbf7d9d..65e40acde71aac 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -199,6 +199,10 @@ static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm) static bool svm_get_nested_state_pages(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); + + if (WARN_ON(!is_guest_mode(vcpu))) + return true; + if (!nested_svm_vmrun_msrpm(svm)) { vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; vcpu->run->internal.suberror = @@ -595,6 +599,8 @@ int nested_svm_vmexit(struct vcpu_svm *svm) svm->nested.vmcb12_gpa = 0; WARN_ON_ONCE(svm->nested.nested_run_pending); + kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, &svm->vcpu); + /* in case we halted in L2 */ svm->vcpu.arch.mp_state = KVM_MP_STATE_RUNNABLE; diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 566f4d18185b1d..5c9630c3f6ba1b 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -1127,9 +1127,6 @@ void sev_vm_destroy(struct kvm *kvm) int __init sev_hardware_setup(void) { - struct sev_user_data_status *status; - int rc; - /* Maximum number of encrypted guests supported simultaneously */ max_sev_asid = cpuid_ecx(0x8000001F); @@ -1148,26 +1145,9 @@ int __init sev_hardware_setup(void) if (!sev_reclaim_asid_bitmap) return 1; - status = kmalloc(sizeof(*status), GFP_KERNEL); - if (!status) - return 1; - - /* - * Check SEV platform status. - * - * PLATFORM_STATUS can be called in any state, if we failed to query - * the PLATFORM status then either PSP firmware does not support SEV - * feature or SEV firmware is dead. - */ - rc = sev_platform_status(status, NULL); - if (rc) - goto err; - pr_info("SEV supported\n"); -err: - kfree(status); - return rc; + return 0; } void sev_hardware_teardown(void) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index da7eb4aaf44f85..94b0cb83304512 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -2543,10 +2543,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) break; case MSR_IA32_SPEC_CTRL: if (!msr_info->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL) && - !guest_cpuid_has(vcpu, X86_FEATURE_AMD_STIBP) && - !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS) && - !guest_cpuid_has(vcpu, X86_FEATURE_AMD_SSBD)) + !guest_has_spec_ctrl_msr(vcpu)) return 1; msr_info->data = svm->spec_ctrl; @@ -2630,10 +2627,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) break; case MSR_IA32_SPEC_CTRL: if (!msr->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL) && - !guest_cpuid_has(vcpu, X86_FEATURE_AMD_STIBP) && - !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS) && - !guest_cpuid_has(vcpu, X86_FEATURE_AMD_SSBD)) + !guest_has_spec_ctrl_msr(vcpu)) return 1; if (kvm_spec_ctrl_test_value(data)) @@ -2658,12 +2652,12 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) break; case MSR_IA32_PRED_CMD: if (!msr->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBPB)) + !guest_has_pred_cmd_msr(vcpu)) return 1; if (data & ~PRED_CMD_IBPB) return 1; - if (!boot_cpu_has(X86_FEATURE_AMD_IBPB)) + if (!boot_cpu_has(X86_FEATURE_IBPB)) return 1; if (!data) break; diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 89af692deb7ef3..f3eca452677817 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -3123,13 +3123,9 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu) return 0; } -static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu) +static bool nested_get_evmcs_page(struct kvm_vcpu *vcpu) { - struct vmcs12 *vmcs12 = get_vmcs12(vcpu); struct vcpu_vmx *vmx = to_vmx(vcpu); - struct kvm_host_map *map; - struct page *page; - u64 hpa; /* * hv_evmcs may end up being not mapped after migration (when @@ -3152,6 +3148,17 @@ static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu) } } + return true; +} + +static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu) +{ + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); + struct vcpu_vmx *vmx = to_vmx(vcpu); + struct kvm_host_map *map; + struct page *page; + u64 hpa; + if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) { /* * Translate L1 physical address to host physical @@ -3220,6 +3227,18 @@ static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu) exec_controls_setbit(vmx, CPU_BASED_USE_MSR_BITMAPS); else exec_controls_clearbit(vmx, CPU_BASED_USE_MSR_BITMAPS); + + return true; +} + +static bool vmx_get_nested_state_pages(struct kvm_vcpu *vcpu) +{ + if (!nested_get_evmcs_page(vcpu)) + return false; + + if (is_guest_mode(vcpu) && !nested_get_vmcs12_pages(vcpu)) + return false; + return true; } @@ -4416,6 +4435,8 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason, /* trying to cancel vmlaunch/vmresume is a bug */ WARN_ON_ONCE(vmx->nested.nested_run_pending); + kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu); + /* Service the TLB flush request for L2 before switching to L1. */ if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu)) kvm_vcpu_flush_tlb_current(vcpu); @@ -6049,11 +6070,14 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu, if (is_guest_mode(vcpu)) { sync_vmcs02_to_vmcs12(vcpu, vmcs12); sync_vmcs02_to_vmcs12_rare(vcpu, vmcs12); - } else if (!vmx->nested.need_vmcs12_to_shadow_sync) { - if (vmx->nested.hv_evmcs) - copy_enlightened_to_vmcs12(vmx); - else if (enable_shadow_vmcs) - copy_shadow_to_vmcs12(vmx); + } else { + copy_vmcs02_to_vmcs12_rare(vcpu, get_vmcs12(vcpu)); + if (!vmx->nested.need_vmcs12_to_shadow_sync) { + if (vmx->nested.hv_evmcs) + copy_enlightened_to_vmcs12(vmx); + else if (enable_shadow_vmcs) + copy_shadow_to_vmcs12(vmx); + } } BUILD_BUG_ON(sizeof(user_vmx_nested_state->vmcs12) < VMCS12_SIZE); @@ -6573,7 +6597,7 @@ struct kvm_x86_nested_ops vmx_nested_ops = { .hv_timer_pending = nested_vmx_preemption_timer_pending, .get_state = vmx_get_nested_state, .set_state = vmx_set_nested_state, - .get_nested_state_pages = nested_get_vmcs12_pages, + .get_nested_state_pages = vmx_get_nested_state_pages, .write_log_dirty = nested_vmx_write_pml_buffer, .enable_evmcs = nested_enable_evmcs, .get_evmcs_version = nested_get_evmcs_version, diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index a886a47daebdad..cdf5f34518f43b 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -29,7 +29,7 @@ static struct kvm_event_hw_type_mapping intel_arch_events[] = { [4] = { 0x2e, 0x41, PERF_COUNT_HW_CACHE_MISSES }, [5] = { 0xc4, 0x00, PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, [6] = { 0xc5, 0x00, PERF_COUNT_HW_BRANCH_MISSES }, - [7] = { 0x00, 0x30, PERF_COUNT_HW_REF_CPU_CYCLES }, + [7] = { 0x00, 0x03, PERF_COUNT_HW_REF_CPU_CYCLES }, }; /* mapping between fixed pmc index and intel_arch_events array */ @@ -345,7 +345,9 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu) pmu->nr_arch_gp_counters = min_t(int, eax.split.num_counters, x86_pmu.num_counters_gp); + eax.split.bit_width = min_t(int, eax.split.bit_width, x86_pmu.bit_width_gp); pmu->counter_bitmask[KVM_PMC_GP] = ((u64)1 << eax.split.bit_width) - 1; + eax.split.mask_length = min_t(int, eax.split.mask_length, x86_pmu.events_mask_len); pmu->available_event_types = ~entry->ebx & ((1ull << eax.split.mask_length) - 1); @@ -355,6 +357,8 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu) pmu->nr_arch_fixed_counters = min_t(int, edx.split.num_counters_fixed, x86_pmu.num_counters_fixed); + edx.split.bit_width_fixed = min_t(int, + edx.split.bit_width_fixed, x86_pmu.bit_width_fixed); pmu->counter_bitmask[KVM_PMC_FIXED] = ((u64)1 << edx.split.bit_width_fixed) - 1; } diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 47b8357b97517e..c01aac2bac37cb 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -1826,7 +1826,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) break; case MSR_IA32_SPEC_CTRL: if (!msr_info->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) + !guest_has_spec_ctrl_msr(vcpu)) return 1; msr_info->data = to_vmx(vcpu)->spec_ctrl; @@ -2028,7 +2028,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) break; case MSR_IA32_SPEC_CTRL: if (!msr_info->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) + !guest_has_spec_ctrl_msr(vcpu)) return 1; if (kvm_spec_ctrl_test_value(data)) @@ -2063,12 +2063,12 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) goto find_uret_msr; case MSR_IA32_PRED_CMD: if (!msr_info->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) + !guest_has_pred_cmd_msr(vcpu)) return 1; if (data & ~PRED_CMD_IBPB) return 1; - if (!boot_cpu_has(X86_FEATURE_SPEC_CTRL)) + if (!boot_cpu_has(X86_FEATURE_IBPB)) return 1; if (!data) break; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index e545a8a613b197..0a302685e4d627 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -105,6 +105,7 @@ static u64 __read_mostly cr4_reserved_bits = CR4_RESERVED_BITS; static void update_cr8_intercept(struct kvm_vcpu *vcpu); static void process_nmi(struct kvm_vcpu *vcpu); +static void process_smi(struct kvm_vcpu *vcpu); static void enter_smm(struct kvm_vcpu *vcpu); static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags); static void store_regs(struct kvm_vcpu *vcpu); @@ -4199,6 +4200,9 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, { process_nmi(vcpu); + if (kvm_check_request(KVM_REQ_SMI, vcpu)) + process_smi(vcpu); + /* * In guest mode, payload delivery should be deferred, * so that the L1 hypervisor can intercept #PF before diff --git a/arch/x86/lib/mmx_32.c b/arch/x86/lib/mmx_32.c index 4321fa02e18df0..419365c48b2ada 100644 --- a/arch/x86/lib/mmx_32.c +++ b/arch/x86/lib/mmx_32.c @@ -26,6 +26,16 @@ #include #include +/* + * Use KFPU_387. MMX instructions are not affected by MXCSR, + * but both AMD and Intel documentation states that even integer MMX + * operations will result in #MF if an exception is pending in FCW. + * + * EMMS is not needed afterwards because, after calling kernel_fpu_end(), + * any subsequent user of the 387 stack will reinitialize it using + * KFPU_387. + */ + void *_mmx_memcpy(void *to, const void *from, size_t len) { void *p; @@ -37,7 +47,7 @@ void *_mmx_memcpy(void *to, const void *from, size_t len) p = to; i = len >> 6; /* len/64 */ - kernel_fpu_begin(); + kernel_fpu_begin_mask(KFPU_387); __asm__ __volatile__ ( "1: prefetch (%0)\n" /* This set is 28 bytes */ @@ -127,7 +137,7 @@ static void fast_clear_page(void *page) { int i; - kernel_fpu_begin(); + kernel_fpu_begin_mask(KFPU_387); __asm__ __volatile__ ( " pxor %%mm0, %%mm0\n" : : @@ -160,7 +170,7 @@ static void fast_copy_page(void *to, void *from) { int i; - kernel_fpu_begin(); + kernel_fpu_begin_mask(KFPU_387); /* * maybe the prefetch stuff can go before the expensive fnsave... @@ -247,7 +257,7 @@ static void fast_clear_page(void *page) { int i; - kernel_fpu_begin(); + kernel_fpu_begin_mask(KFPU_387); __asm__ __volatile__ ( " pxor %%mm0, %%mm0\n" : : @@ -282,7 +292,7 @@ static void fast_copy_page(void *to, void *from) { int i; - kernel_fpu_begin(); + kernel_fpu_begin_mask(KFPU_387); __asm__ __volatile__ ( "1: prefetch (%0)\n" diff --git a/arch/x86/mm/ident_map.c b/arch/x86/mm/ident_map.c index fe7a12599d8eb8..968d7005f4a724 100644 --- a/arch/x86/mm/ident_map.c +++ b/arch/x86/mm/ident_map.c @@ -62,6 +62,7 @@ static int ident_p4d_init(struct x86_mapping_info *info, p4d_t *p4d_page, unsigned long addr, unsigned long end) { unsigned long next; + int result; for (; addr < end; addr = next) { p4d_t *p4d = p4d_page + p4d_index(addr); @@ -73,13 +74,20 @@ static int ident_p4d_init(struct x86_mapping_info *info, p4d_t *p4d_page, if (p4d_present(*p4d)) { pud = pud_offset(p4d, 0); - ident_pud_init(info, pud, addr, next); + result = ident_pud_init(info, pud, addr, next); + if (result) + return result; + continue; } pud = (pud_t *)info->alloc_pgt_page(info->context); if (!pud) return -ENOMEM; - ident_pud_init(info, pud, addr, next); + + result = ident_pud_init(info, pud, addr, next); + if (result) + return result; + set_p4d(p4d, __p4d(__pa(pud) | info->kernpg_flag)); } diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index dfd82f51ba66bc..f6a9e2e3664259 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -829,6 +829,8 @@ int pud_free_pmd_page(pud_t *pud, unsigned long addr) } free_page((unsigned long)pmd_sv); + + pgtable_pmd_page_dtor(virt_to_page(pmd)); free_page((unsigned long)pmd); return 1; diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c index 9e87ab010c82bb..ec50b7423a4c86 100644 --- a/arch/x86/xen/enlighten_hvm.c +++ b/arch/x86/xen/enlighten_hvm.c @@ -188,6 +188,8 @@ static int xen_cpu_dead_hvm(unsigned int cpu) return 0; } +static bool no_vector_callback __initdata; + static void __init xen_hvm_guest_init(void) { if (xen_pv_domain()) @@ -207,7 +209,7 @@ static void __init xen_hvm_guest_init(void) xen_panic_handler_init(); - if (xen_feature(XENFEAT_hvm_callback_vector)) + if (!no_vector_callback && xen_feature(XENFEAT_hvm_callback_vector)) xen_have_vector_callback = 1; xen_hvm_smp_init(); @@ -233,6 +235,13 @@ static __init int xen_parse_nopv(char *arg) } early_param("xen_nopv", xen_parse_nopv); +static __init int xen_parse_no_vector_callback(char *arg) +{ + no_vector_callback = true; + return 0; +} +early_param("xen_no_vector_callback", xen_parse_no_vector_callback); + bool __init xen_hvm_need_lapic(void) { if (xen_pv_domain()) diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 4409306364dc3c..9a5a50cdaab596 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -583,6 +583,13 @@ DEFINE_IDTENTRY_RAW(xenpv_exc_debug) exc_debug(regs); } +DEFINE_IDTENTRY_RAW(exc_xen_unknown_trap) +{ + /* This should never happen and there is no way to handle it. */ + pr_err("Unknown trap in Xen PV mode."); + BUG(); +} + struct trap_array_entry { void (*orig)(void); void (*xen)(void); @@ -631,6 +638,7 @@ static bool __ref get_trap_addr(void **addr, unsigned int ist) { unsigned int nr; bool ist_okay = false; + bool found = false; /* * Replace trap handler addresses by Xen specific ones. @@ -645,6 +653,7 @@ static bool __ref get_trap_addr(void **addr, unsigned int ist) if (*addr == entry->orig) { *addr = entry->xen; ist_okay = entry->ist_okay; + found = true; break; } } @@ -655,9 +664,13 @@ static bool __ref get_trap_addr(void **addr, unsigned int ist) nr = (*addr - (void *)early_idt_handler_array[0]) / EARLY_IDT_HANDLER_SIZE; *addr = (void *)xen_early_idt_handler_array[nr]; + found = true; } - if (WARN_ON(ist != 0 && !ist_okay)) + if (!found) + *addr = (void *)xen_asm_exc_xen_unknown_trap; + + if (WARN_ON(found && ist != 0 && !ist_okay)) return false; return true; diff --git a/arch/x86/xen/smp_hvm.c b/arch/x86/xen/smp_hvm.c index f5e7db4f82abb6..6ff3c887e0b995 100644 --- a/arch/x86/xen/smp_hvm.c +++ b/arch/x86/xen/smp_hvm.c @@ -33,9 +33,11 @@ static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus) int cpu; native_smp_prepare_cpus(max_cpus); - WARN_ON(xen_smp_intr_init(0)); - xen_init_lock_cpu(0); + if (xen_have_vector_callback) { + WARN_ON(xen_smp_intr_init(0)); + xen_init_lock_cpu(0); + } for_each_possible_cpu(cpu) { if (cpu == 0) @@ -50,9 +52,11 @@ static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus) static void xen_hvm_cpu_die(unsigned int cpu) { if (common_cpu_die(cpu) == 0) { - xen_smp_intr_free(cpu); - xen_uninit_lock_cpu(cpu); - xen_teardown_timer(cpu); + if (xen_have_vector_callback) { + xen_smp_intr_free(cpu); + xen_uninit_lock_cpu(cpu); + xen_teardown_timer(cpu); + } } } #else @@ -64,14 +68,19 @@ static void xen_hvm_cpu_die(unsigned int cpu) void __init xen_hvm_smp_init(void) { - if (!xen_have_vector_callback) + smp_ops.smp_prepare_boot_cpu = xen_hvm_smp_prepare_boot_cpu; + smp_ops.smp_prepare_cpus = xen_hvm_smp_prepare_cpus; + smp_ops.smp_cpus_done = xen_smp_cpus_done; + smp_ops.cpu_die = xen_hvm_cpu_die; + + if (!xen_have_vector_callback) { +#ifdef CONFIG_PARAVIRT_SPINLOCKS + nopvspin = true; +#endif return; + } - smp_ops.smp_prepare_cpus = xen_hvm_smp_prepare_cpus; smp_ops.smp_send_reschedule = xen_smp_send_reschedule; - smp_ops.cpu_die = xen_hvm_cpu_die; smp_ops.send_call_func_ipi = xen_smp_send_call_function_ipi; smp_ops.send_call_func_single_ipi = xen_smp_send_call_function_single_ipi; - smp_ops.smp_prepare_boot_cpu = xen_hvm_smp_prepare_boot_cpu; - smp_ops.smp_cpus_done = xen_smp_cpus_done; } diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S index 1cb0e84b91610a..53cf8aa35032d6 100644 --- a/arch/x86/xen/xen-asm.S +++ b/arch/x86/xen/xen-asm.S @@ -178,6 +178,7 @@ xen_pv_trap asm_exc_simd_coprocessor_error #ifdef CONFIG_IA32_EMULATION xen_pv_trap entry_INT80_compat #endif +xen_pv_trap asm_exc_xen_unknown_trap xen_pv_trap asm_exc_xen_hypervisor_callback __INIT diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild index c59c42a1221a86..adefb1636f7aec 100644 --- a/arch/xtensa/include/asm/Kbuild +++ b/arch/xtensa/include/asm/Kbuild @@ -2,7 +2,6 @@ generated-y += syscall_table.h generic-y += extable.h generic-y += kvm_para.h -generic-y += local64.h generic-y += mcs_spinlock.h generic-y += param.h generic-y += qrwlock.h diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 9e81d1052091fc..9e4eb0fc1c16e7 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -6332,13 +6332,13 @@ static unsigned int bfq_update_depths(struct bfq_data *bfqd, * limit 'something'. */ /* no more than 50% of tags for async I/O */ - bfqd->word_depths[0][0] = max((1U << bt->sb.shift) >> 1, 1U); + bfqd->word_depths[0][0] = max(bt->sb.depth >> 1, 1U); /* * no more than 75% of tags for sync writes (25% extra tags * w.r.t. async I/O, to prevent async I/O from starving sync * writes) */ - bfqd->word_depths[0][1] = max(((1U << bt->sb.shift) * 3) >> 2, 1U); + bfqd->word_depths[0][1] = max((bt->sb.depth * 3) >> 2, 1U); /* * In-word depths in case some bfq_queue is being weight- @@ -6348,9 +6348,9 @@ static unsigned int bfq_update_depths(struct bfq_data *bfqd, * shortage. */ /* no more than ~18% of tags for async I/O */ - bfqd->word_depths[1][0] = max(((1U << bt->sb.shift) * 3) >> 4, 1U); + bfqd->word_depths[1][0] = max((bt->sb.depth * 3) >> 4, 1U); /* no more than ~37% of tags for sync writes (~20% extra tags) */ - bfqd->word_depths[1][1] = max(((1U << bt->sb.shift) * 6) >> 4, 1U); + bfqd->word_depths[1][1] = max((bt->sb.depth * 6) >> 4, 1U); for (i = 0; i < 2; i++) for (j = 0; j < 2; j++) diff --git a/block/blk-core.c b/block/blk-core.c index 2db8bda43b6e6d..2d53e2ff48ff88 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -424,11 +425,11 @@ EXPORT_SYMBOL(blk_cleanup_queue); /** * blk_queue_enter() - try to increase q->q_usage_counter * @q: request queue pointer - * @flags: BLK_MQ_REQ_NOWAIT and/or BLK_MQ_REQ_PREEMPT + * @flags: BLK_MQ_REQ_NOWAIT and/or BLK_MQ_REQ_PM */ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags) { - const bool pm = flags & BLK_MQ_REQ_PREEMPT; + const bool pm = flags & BLK_MQ_REQ_PM; while (true) { bool success = false; @@ -440,7 +441,8 @@ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags) * responsible for ensuring that that counter is * globally visible before the queue is unfrozen. */ - if (pm || !blk_queue_pm_only(q)) { + if ((pm && queue_rpm_status(q) != RPM_SUSPENDED) || + !blk_queue_pm_only(q)) { success = true; } else { percpu_ref_put(&q->q_usage_counter); @@ -465,8 +467,7 @@ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags) wait_event(q->mq_freeze_wq, (!q->mq_freeze_depth && - (pm || (blk_pm_request_resume(q), - !blk_queue_pm_only(q)))) || + blk_pm_resume_queue(pm, q)) || blk_queue_dying(q)); if (blk_queue_dying(q)) return -ENODEV; @@ -630,7 +631,7 @@ struct request *blk_get_request(struct request_queue *q, unsigned int op, struct request *req; WARN_ON_ONCE(op & REQ_NOWAIT); - WARN_ON_ONCE(flags & ~(BLK_MQ_REQ_NOWAIT | BLK_MQ_REQ_PREEMPT)); + WARN_ON_ONCE(flags & ~(BLK_MQ_REQ_NOWAIT | BLK_MQ_REQ_PM)); req = blk_mq_alloc_request(q, op, flags); if (!IS_ERR(req) && q->mq_ops->initialize_rq_fn) diff --git a/block/blk-iocost.c b/block/blk-iocost.c index bbe86d1199dc5b..7e963b457f2ec6 100644 --- a/block/blk-iocost.c +++ b/block/blk-iocost.c @@ -2525,8 +2525,8 @@ static void ioc_rqos_throttle(struct rq_qos *rqos, struct bio *bio) bool use_debt, ioc_locked; unsigned long flags; - /* bypass IOs if disabled or for root cgroup */ - if (!ioc->enabled || !iocg->level) + /* bypass IOs if disabled, still initializing, or for root cgroup */ + if (!ioc->enabled || !iocg || !iocg->level) return; /* calculate the absolute vtime cost */ @@ -2653,14 +2653,14 @@ static void ioc_rqos_merge(struct rq_qos *rqos, struct request *rq, struct bio *bio) { struct ioc_gq *iocg = blkg_to_iocg(bio->bi_blkg); - struct ioc *ioc = iocg->ioc; + struct ioc *ioc = rqos_to_ioc(rqos); sector_t bio_end = bio_end_sector(bio); struct ioc_now now; u64 vtime, abs_cost, cost; unsigned long flags; - /* bypass if disabled or for root cgroup */ - if (!ioc->enabled || !iocg->level) + /* bypass if disabled, still initializing, or for root cgroup */ + if (!ioc->enabled || !iocg || !iocg->level) return; abs_cost = calc_vtime_cost(bio, iocg, true); @@ -2837,6 +2837,12 @@ static int blk_iocost_init(struct request_queue *q) ioc_refresh_params(ioc, true); spin_unlock_irq(&ioc->lock); + /* + * rqos must be added before activation to allow iocg_pd_init() to + * lookup the ioc from q. This means that the rqos methods may get + * called before policy activation completion, can't assume that the + * target bio has an iocg associated and need to test for NULL iocg. + */ rq_qos_add(q, rqos); ret = blkcg_activate_policy(q, &blkcg_policy_iocost); if (ret) { diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 3094542e12ae0f..4de03da9a624b8 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -129,6 +129,7 @@ static const char *const blk_queue_flag_name[] = { QUEUE_FLAG_NAME(PCI_P2PDMA), QUEUE_FLAG_NAME(ZONE_RESETALL), QUEUE_FLAG_NAME(RQ_ALLOC_TIME), + QUEUE_FLAG_NAME(NOWAIT), }; #undef QUEUE_FLAG_NAME @@ -245,6 +246,7 @@ static const char *const hctx_flag_name[] = { HCTX_FLAG_NAME(BLOCKING), HCTX_FLAG_NAME(NO_SCHED), HCTX_FLAG_NAME(STACKING), + HCTX_FLAG_NAME(TAG_HCTX_SHARED), }; #undef HCTX_FLAG_NAME @@ -297,7 +299,6 @@ static const char *const rqf_name[] = { RQF_NAME(MIXED_MERGE), RQF_NAME(MQ_INFLIGHT), RQF_NAME(DONTPREP), - RQF_NAME(PREEMPT), RQF_NAME(FAILED), RQF_NAME(QUIET), RQF_NAME(ELVPRIV), diff --git a/block/blk-mq.c b/block/blk-mq.c index 55bcee5dc0320c..2a1eff60c79750 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -292,8 +292,8 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data, rq->mq_hctx = data->hctx; rq->rq_flags = 0; rq->cmd_flags = data->cmd_flags; - if (data->flags & BLK_MQ_REQ_PREEMPT) - rq->rq_flags |= RQF_PREEMPT; + if (data->flags & BLK_MQ_REQ_PM) + rq->rq_flags |= RQF_PM; if (blk_queue_io_stat(data->q)) rq->rq_flags |= RQF_IO_STAT; INIT_LIST_HEAD(&rq->queuelist); diff --git a/block/blk-mq.h b/block/blk-mq.h index a52703c98b7736..d2359f7cfd5f2e 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -303,7 +303,7 @@ static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx, struct request_queue *q = hctx->queue; struct blk_mq_tag_set *set = q->tag_set; - if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &q->queue_flags)) + if (!test_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags)) return true; users = atomic_read(&set->active_queues_shared_sbitmap); } else { diff --git a/block/blk-pm.c b/block/blk-pm.c index b85234d758f7b2..17bd020268d421 100644 --- a/block/blk-pm.c +++ b/block/blk-pm.c @@ -67,6 +67,10 @@ int blk_pre_runtime_suspend(struct request_queue *q) WARN_ON_ONCE(q->rpm_status != RPM_ACTIVE); + spin_lock_irq(&q->queue_lock); + q->rpm_status = RPM_SUSPENDING; + spin_unlock_irq(&q->queue_lock); + /* * Increase the pm_only counter before checking whether any * non-PM blk_queue_enter() calls are in progress to avoid that any @@ -89,15 +93,14 @@ int blk_pre_runtime_suspend(struct request_queue *q) /* Switch q_usage_counter back to per-cpu mode. */ blk_mq_unfreeze_queue(q); - spin_lock_irq(&q->queue_lock); - if (ret < 0) + if (ret < 0) { + spin_lock_irq(&q->queue_lock); + q->rpm_status = RPM_ACTIVE; pm_runtime_mark_last_busy(q->dev); - else - q->rpm_status = RPM_SUSPENDING; - spin_unlock_irq(&q->queue_lock); + spin_unlock_irq(&q->queue_lock); - if (ret) blk_clear_pm_only(q); + } return ret; } diff --git a/block/blk-pm.h b/block/blk-pm.h index ea5507d23e7597..a2283cc9f716dc 100644 --- a/block/blk-pm.h +++ b/block/blk-pm.h @@ -6,11 +6,14 @@ #include #ifdef CONFIG_PM -static inline void blk_pm_request_resume(struct request_queue *q) +static inline int blk_pm_resume_queue(const bool pm, struct request_queue *q) { - if (q->dev && (q->rpm_status == RPM_SUSPENDED || - q->rpm_status == RPM_SUSPENDING)) - pm_request_resume(q->dev); + if (!q->dev || !blk_queue_pm_only(q)) + return 1; /* Nothing to do */ + if (pm && q->rpm_status != RPM_SUSPENDED) + return 1; /* Request allowed */ + pm_request_resume(q->dev); + return 0; } static inline void blk_pm_mark_last_busy(struct request *rq) @@ -44,8 +47,9 @@ static inline void blk_pm_put_request(struct request *rq) --rq->q->nr_pending; } #else -static inline void blk_pm_request_resume(struct request_queue *q) +static inline int blk_pm_resume_queue(const bool pm, struct request_queue *q) { + return 1; } static inline void blk_pm_mark_last_busy(struct request *rq) diff --git a/block/genhd.c b/block/genhd.c index 9387f050c248a7..ec6264e2ed671e 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -256,14 +256,17 @@ struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter) part = rcu_dereference(ptbl->part[piter->idx]); if (!part) continue; + get_device(part_to_dev(part)); + piter->part = part; if (!part_nr_sects_read(part) && !(piter->flags & DISK_PITER_INCL_EMPTY) && !(piter->flags & DISK_PITER_INCL_EMPTY_PART0 && - piter->idx == 0)) + piter->idx == 0)) { + put_device(part_to_dev(part)); + piter->part = NULL; continue; + } - get_device(part_to_dev(part)); - piter->part = part; piter->idx += inc; break; } diff --git a/crypto/Kconfig b/crypto/Kconfig index 094ef56ab7b42d..37de7d006858d7 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -145,7 +145,7 @@ config CRYPTO_MANAGER_DISABLE_TESTS config CRYPTO_MANAGER_EXTRA_TESTS bool "Enable extra run-time crypto self tests" - depends on DEBUG_KERNEL && !CRYPTO_MANAGER_DISABLE_TESTS + depends on DEBUG_KERNEL && !CRYPTO_MANAGER_DISABLE_TESTS && CRYPTO_MANAGER help Enable extra run-time self tests of registered crypto algorithms, including randomized fuzz tests. diff --git a/crypto/af_alg.c b/crypto/af_alg.c index d11db80d24cd14..9acb9d2c4bcf93 100644 --- a/crypto/af_alg.c +++ b/crypto/af_alg.c @@ -147,7 +147,7 @@ static int alg_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) const u32 allowed = CRYPTO_ALG_KERN_DRIVER_ONLY; struct sock *sk = sock->sk; struct alg_sock *ask = alg_sk(sk); - struct sockaddr_alg *sa = (void *)uaddr; + struct sockaddr_alg_new *sa = (void *)uaddr; const struct af_alg_type *type; void *private; int err; @@ -155,7 +155,11 @@ static int alg_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if (sock->state == SS_CONNECTED) return -EINVAL; - if (addr_len < sizeof(*sa)) + BUILD_BUG_ON(offsetof(struct sockaddr_alg_new, salg_name) != + offsetof(struct sockaddr_alg, salg_name)); + BUILD_BUG_ON(offsetof(struct sockaddr_alg, salg_name) != sizeof(*sa)); + + if (addr_len < sizeof(*sa) + 1) return -EINVAL; /* If caller uses non-allowed flag, return error. */ @@ -163,7 +167,7 @@ static int alg_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) return -EINVAL; sa->salg_type[sizeof(sa->salg_type) - 1] = 0; - sa->salg_name[sizeof(sa->salg_name) + addr_len - sizeof(*sa) - 1] = 0; + sa->salg_name[addr_len - sizeof(*sa) - 1] = 0; type = alg_get_type(sa->salg_type); if (PTR_ERR(type) == -ENOENT) { diff --git a/crypto/asymmetric_keys/asym_tpm.c b/crypto/asymmetric_keys/asym_tpm.c index 378b18b9bc342a..84a5d6af9609e9 100644 --- a/crypto/asymmetric_keys/asym_tpm.c +++ b/crypto/asymmetric_keys/asym_tpm.c @@ -354,7 +354,7 @@ static uint32_t derive_pub_key(const void *pub_key, uint32_t len, uint8_t *buf) memcpy(cur, e, sizeof(e)); cur += sizeof(e); /* Zero parameters to satisfy set_pub_key ABI. */ - memset(cur, 0, SETKEY_PARAMS_SIZE); + memzero_explicit(cur, SETKEY_PARAMS_SIZE); return cur - buf; } diff --git a/crypto/asymmetric_keys/public_key.c b/crypto/asymmetric_keys/public_key.c index 8892908ad58ce4..788a4ba1e2e747 100644 --- a/crypto/asymmetric_keys/public_key.c +++ b/crypto/asymmetric_keys/public_key.c @@ -356,7 +356,8 @@ int public_key_verify_signature(const struct public_key *pkey, if (ret) goto error_free_key; - if (strcmp(sig->pkey_algo, "sm2") == 0 && sig->data_size) { + if (sig->pkey_algo && strcmp(sig->pkey_algo, "sm2") == 0 && + sig->data_size) { ret = cert_sig_digest_update(sig, tfm); if (ret) goto error_free_key; diff --git a/crypto/ecdh.c b/crypto/ecdh.c index b0232d6ab4ce70..96f80c8f8e3048 100644 --- a/crypto/ecdh.c +++ b/crypto/ecdh.c @@ -39,7 +39,8 @@ static int ecdh_set_secret(struct crypto_kpp *tfm, const void *buf, struct ecdh params; unsigned int ndigits; - if (crypto_ecdh_decode_key(buf, len, ¶ms) < 0) + if (crypto_ecdh_decode_key(buf, len, ¶ms) < 0 || + params.key_size > sizeof(ctx->private_key)) return -EINVAL; ndigits = ecdh_supported_curve(params.curve_id); @@ -53,12 +54,13 @@ static int ecdh_set_secret(struct crypto_kpp *tfm, const void *buf, return ecc_gen_privkey(ctx->curve_id, ctx->ndigits, ctx->private_key); - if (ecc_is_key_valid(ctx->curve_id, ctx->ndigits, - (const u64 *)params.key, params.key_size) < 0) - return -EINVAL; - memcpy(ctx->private_key, params.key, params.key_size); + if (ecc_is_key_valid(ctx->curve_id, ctx->ndigits, + ctx->private_key, params.key_size) < 0) { + memzero_explicit(ctx->private_key, params.key_size); + return -EINVAL; + } return 0; } diff --git a/crypto/xor.c b/crypto/xor.c index eacbf4f939900f..8f899f898ec9f9 100644 --- a/crypto/xor.c +++ b/crypto/xor.c @@ -107,6 +107,8 @@ do_xor_speed(struct xor_block_template *tmpl, void *b1, void *b2) preempt_enable(); // bytes/ns == GB/s, multiply by 1000 to get MB/s [not MiB/s] + if (!min) + min = 1; speed = (1000 * REPS * BENCH_SIZE) / (unsigned int)ktime_to_ns(min); tmpl->speed = speed; diff --git a/drivers/accessibility/speakup/speakup_dectlk.c b/drivers/accessibility/speakup/speakup_dectlk.c index 780214b5ca16ea..ab6d61e80b1cba 100644 --- a/drivers/accessibility/speakup/speakup_dectlk.c +++ b/drivers/accessibility/speakup/speakup_dectlk.c @@ -37,7 +37,7 @@ static unsigned char get_index(struct spk_synth *synth); static int in_escape; static int is_flushing; -static spinlock_t flush_lock; +static DEFINE_SPINLOCK(flush_lock); static DECLARE_WAIT_QUEUE_HEAD(flush); static struct var_t vars[] = { diff --git a/drivers/acpi/acpi_pnp.c b/drivers/acpi/acpi_pnp.c index 4ed755a963aa5a..8f2dc176bb412b 100644 --- a/drivers/acpi/acpi_pnp.c +++ b/drivers/acpi/acpi_pnp.c @@ -319,6 +319,9 @@ static bool matching_id(const char *idstr, const char *list_id) { int i; + if (strlen(idstr) != strlen(list_id)) + return false; + if (memcmp(idstr, list_id, 3)) return false; diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c index 770d84071a3286..94f34109695c95 100644 --- a/drivers/acpi/arm64/iort.c +++ b/drivers/acpi/arm64/iort.c @@ -1107,6 +1107,11 @@ static int nc_dma_get_range(struct device *dev, u64 *size) ncomp = (struct acpi_iort_named_component *)node->node_data; + if (!ncomp->memory_address_limit) { + pr_warn(FW_BUG "Named component missing memory address limit\n"); + return -EINVAL; + } + *size = ncomp->memory_address_limit >= 64 ? U64_MAX : 1ULL<memory_address_limit; @@ -1126,6 +1131,11 @@ static int rc_dma_get_range(struct device *dev, u64 *size) rc = (struct acpi_iort_root_complex *)node->node_data; + if (!rc->memory_address_limit) { + pr_warn(FW_BUG "Root complex missing memory address limit\n"); + return -EINVAL; + } + *size = rc->memory_address_limit >= 64 ? U64_MAX : 1ULL<memory_address_limit; @@ -1173,8 +1183,8 @@ void iort_dma_setup(struct device *dev, u64 *dma_addr, u64 *dma_size) end = dmaaddr + size - 1; mask = DMA_BIT_MASK(ilog2(end) + 1); dev->bus_dma_limit = end; - dev->coherent_dma_mask = mask; - *dev->dma_mask = mask; + dev->coherent_dma_mask = min(dev->coherent_dma_mask, mask); + *dev->dma_mask = min(*dev->dma_mask, mask); } *dma_addr = dmaaddr; diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c index 94d91c67aeaeb8..ef77dbcaf58f68 100644 --- a/drivers/acpi/device_pm.c +++ b/drivers/acpi/device_pm.c @@ -749,7 +749,7 @@ static void acpi_pm_notify_work_func(struct acpi_device_wakeup_context *context) static DEFINE_MUTEX(acpi_wakeup_lock); static int __acpi_device_wakeup_enable(struct acpi_device *adev, - u32 target_state, int max_count) + u32 target_state) { struct acpi_device_wakeup *wakeup = &adev->wakeup; acpi_status status; @@ -757,9 +757,10 @@ static int __acpi_device_wakeup_enable(struct acpi_device *adev, mutex_lock(&acpi_wakeup_lock); - if (wakeup->enable_count >= max_count) + if (wakeup->enable_count >= INT_MAX) { + acpi_handle_info(adev->handle, "Wakeup enable count out of bounds!\n"); goto out; - + } if (wakeup->enable_count > 0) goto inc; @@ -799,7 +800,7 @@ static int __acpi_device_wakeup_enable(struct acpi_device *adev, */ static int acpi_device_wakeup_enable(struct acpi_device *adev, u32 target_state) { - return __acpi_device_wakeup_enable(adev, target_state, 1); + return __acpi_device_wakeup_enable(adev, target_state); } /** @@ -829,8 +830,12 @@ static void acpi_device_wakeup_disable(struct acpi_device *adev) mutex_unlock(&acpi_wakeup_lock); } -static int __acpi_pm_set_device_wakeup(struct device *dev, bool enable, - int max_count) +/** + * acpi_pm_set_device_wakeup - Enable/disable remote wakeup for given device. + * @dev: Device to enable/disable to generate wakeup events. + * @enable: Whether to enable or disable the wakeup functionality. + */ +int acpi_pm_set_device_wakeup(struct device *dev, bool enable) { struct acpi_device *adev; int error; @@ -850,36 +855,14 @@ static int __acpi_pm_set_device_wakeup(struct device *dev, bool enable, return 0; } - error = __acpi_device_wakeup_enable(adev, acpi_target_system_state(), - max_count); + error = __acpi_device_wakeup_enable(adev, acpi_target_system_state()); if (!error) dev_dbg(dev, "Wakeup enabled by ACPI\n"); return error; } - -/** - * acpi_pm_set_device_wakeup - Enable/disable remote wakeup for given device. - * @dev: Device to enable/disable to generate wakeup events. - * @enable: Whether to enable or disable the wakeup functionality. - */ -int acpi_pm_set_device_wakeup(struct device *dev, bool enable) -{ - return __acpi_pm_set_device_wakeup(dev, enable, 1); -} EXPORT_SYMBOL_GPL(acpi_pm_set_device_wakeup); -/** - * acpi_pm_set_bridge_wakeup - Enable/disable remote wakeup for given bridge. - * @dev: Bridge device to enable/disable to generate wakeup events. - * @enable: Whether to enable or disable the wakeup functionality. - */ -int acpi_pm_set_bridge_wakeup(struct device *dev, bool enable) -{ - return __acpi_pm_set_device_wakeup(dev, enable, INT_MAX); -} -EXPORT_SYMBOL_GPL(acpi_pm_set_bridge_wakeup); - /** * acpi_dev_pm_low_power - Put ACPI device into a low-power state. * @dev: Device to put into a low-power state. diff --git a/drivers/acpi/device_sysfs.c b/drivers/acpi/device_sysfs.c index 96869f1538b93a..bfca116482b8bb 100644 --- a/drivers/acpi/device_sysfs.c +++ b/drivers/acpi/device_sysfs.c @@ -251,20 +251,12 @@ int __acpi_device_uevent_modalias(struct acpi_device *adev, if (add_uevent_var(env, "MODALIAS=")) return -ENOMEM; - len = create_pnp_modalias(adev, &env->buf[env->buflen - 1], - sizeof(env->buf) - env->buflen); - if (len < 0) - return len; - - env->buflen += len; - if (!adev->data.of_compatible) - return 0; - - if (len > 0 && add_uevent_var(env, "MODALIAS=")) - return -ENOMEM; - - len = create_of_modalias(adev, &env->buf[env->buflen - 1], - sizeof(env->buf) - env->buflen); + if (adev->data.of_compatible) + len = create_of_modalias(adev, &env->buf[env->buflen - 1], + sizeof(env->buf) - env->buflen); + else + len = create_pnp_modalias(adev, &env->buf[env->buflen - 1], + sizeof(env->buf) - env->buflen); if (len < 0) return len; diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h index e3638bafb94110..aee023ad023754 100644 --- a/drivers/acpi/internal.h +++ b/drivers/acpi/internal.h @@ -97,7 +97,7 @@ void acpi_scan_table_handler(u32 event, void *table, void *context); extern struct list_head acpi_bus_id_list; struct acpi_device_bus_id { - char bus_id[15]; + const char *bus_id; unsigned int instance_no; struct list_head node; }; diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index 442608220b5c80..4c97b0f44fce22 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -478,8 +479,11 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, cmd_mask = nd_desc->cmd_mask; if (cmd == ND_CMD_CALL && call_pkg->nd_family) { family = call_pkg->nd_family; - if (!test_bit(family, &nd_desc->bus_family_mask)) + if (family > NVDIMM_BUS_FAMILY_MAX || + !test_bit(family, &nd_desc->bus_family_mask)) return -EINVAL; + family = array_index_nospec(family, + NVDIMM_BUS_FAMILY_MAX + 1); dsm_mask = acpi_desc->family_dsm_mask[family]; guid = to_nfit_bus_uuid(family); } else { diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c index ad04824ca3baa0..f2f5f1dc7c61d6 100644 --- a/drivers/acpi/resource.c +++ b/drivers/acpi/resource.c @@ -541,7 +541,7 @@ static acpi_status acpi_dev_process_resource(struct acpi_resource *ares, ret = c->preproc(ares, c->preproc_data); if (ret < 0) { c->error = ret; - return AE_CTRL_TERMINATE; + return AE_ABORT_METHOD; } else if (ret > 0) { return AE_OK; } diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index bc6a79e3322092..dca5cc423cd41a 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -486,6 +486,7 @@ static void acpi_device_del(struct acpi_device *device) acpi_device_bus_id->instance_no--; else { list_del(&acpi_device_bus_id->node); + kfree_const(acpi_device_bus_id->bus_id); kfree(acpi_device_bus_id); } break; @@ -585,6 +586,8 @@ static int acpi_get_device_data(acpi_handle handle, struct acpi_device **device, if (!device) return -EINVAL; + *device = NULL; + status = acpi_get_data_full(handle, acpi_scan_drop_device, (void **)device, callback); if (ACPI_FAILURE(status) || !*device) { @@ -674,7 +677,14 @@ int acpi_device_add(struct acpi_device *device, } if (!found) { acpi_device_bus_id = new_bus_id; - strcpy(acpi_device_bus_id->bus_id, acpi_device_hid(device)); + acpi_device_bus_id->bus_id = + kstrdup_const(acpi_device_hid(device), GFP_KERNEL); + if (!acpi_device_bus_id->bus_id) { + pr_err(PREFIX "Memory allocation error for bus id\n"); + result = -ENOMEM; + goto err_free_new_bus_id; + } + acpi_device_bus_id->instance_no = 0; list_add_tail(&acpi_device_bus_id->node, &acpi_bus_id_list); } @@ -709,6 +719,11 @@ int acpi_device_add(struct acpi_device *device, if (device->parent) list_del(&device->node); list_del(&device->wakeup_list); + + err_free_new_bus_id: + if (!found) + kfree(new_bus_id); + mutex_unlock(&acpi_device_lock); err_detach: diff --git a/drivers/acpi/thermal.c b/drivers/acpi/thermal.c index 12c0ece746f04a..859b1de31ddc0c 100644 --- a/drivers/acpi/thermal.c +++ b/drivers/acpi/thermal.c @@ -174,6 +174,8 @@ struct acpi_thermal { struct thermal_zone_device *thermal_zone; int kelvin_offset; /* in millidegrees */ struct work_struct thermal_check_work; + struct mutex thermal_check_lock; + refcount_t thermal_check_count; }; /* -------------------------------------------------------------------------- @@ -495,14 +497,6 @@ static int acpi_thermal_get_trip_points(struct acpi_thermal *tz) return 0; } -static void acpi_thermal_check(void *data) -{ - struct acpi_thermal *tz = data; - - thermal_zone_device_update(tz->thermal_zone, - THERMAL_EVENT_UNSPECIFIED); -} - /* sys I/F for generic thermal sysfs support */ static int thermal_get_temp(struct thermal_zone_device *thermal, int *temp) @@ -900,6 +894,12 @@ static void acpi_thermal_unregister_thermal_zone(struct acpi_thermal *tz) Driver Interface -------------------------------------------------------------------------- */ +static void acpi_queue_thermal_check(struct acpi_thermal *tz) +{ + if (!work_pending(&tz->thermal_check_work)) + queue_work(acpi_thermal_pm_queue, &tz->thermal_check_work); +} + static void acpi_thermal_notify(struct acpi_device *device, u32 event) { struct acpi_thermal *tz = acpi_driver_data(device); @@ -910,17 +910,17 @@ static void acpi_thermal_notify(struct acpi_device *device, u32 event) switch (event) { case ACPI_THERMAL_NOTIFY_TEMPERATURE: - acpi_thermal_check(tz); + acpi_queue_thermal_check(tz); break; case ACPI_THERMAL_NOTIFY_THRESHOLDS: acpi_thermal_trips_update(tz, ACPI_TRIPS_REFRESH_THRESHOLDS); - acpi_thermal_check(tz); + acpi_queue_thermal_check(tz); acpi_bus_generate_netlink_event(device->pnp.device_class, dev_name(&device->dev), event, 0); break; case ACPI_THERMAL_NOTIFY_DEVICES: acpi_thermal_trips_update(tz, ACPI_TRIPS_REFRESH_DEVICES); - acpi_thermal_check(tz); + acpi_queue_thermal_check(tz); acpi_bus_generate_netlink_event(device->pnp.device_class, dev_name(&device->dev), event, 0); break; @@ -1020,7 +1020,25 @@ static void acpi_thermal_check_fn(struct work_struct *work) { struct acpi_thermal *tz = container_of(work, struct acpi_thermal, thermal_check_work); - acpi_thermal_check(tz); + + /* + * In general, it is not sufficient to check the pending bit, because + * subsequent instances of this function may be queued after one of them + * has started running (e.g. if _TMP sleeps). Avoid bailing out if just + * one of them is running, though, because it may have done the actual + * check some time ago, so allow at least one of them to block on the + * mutex while another one is running the update. + */ + if (!refcount_dec_not_one(&tz->thermal_check_count)) + return; + + mutex_lock(&tz->thermal_check_lock); + + thermal_zone_device_update(tz->thermal_zone, THERMAL_EVENT_UNSPECIFIED); + + refcount_inc(&tz->thermal_check_count); + + mutex_unlock(&tz->thermal_check_lock); } static int acpi_thermal_add(struct acpi_device *device) @@ -1052,6 +1070,8 @@ static int acpi_thermal_add(struct acpi_device *device) if (result) goto free_memory; + refcount_set(&tz->thermal_check_count, 3); + mutex_init(&tz->thermal_check_lock); INIT_WORK(&tz->thermal_check_work, acpi_thermal_check_fn); pr_info(PREFIX "%s [%s] (%ld C)\n", acpi_device_name(device), @@ -1117,7 +1137,7 @@ static int acpi_thermal_resume(struct device *dev) tz->state.active |= tz->trips.active[i].flags.enabled; } - queue_work(acpi_thermal_pm_queue, &tz->thermal_check_work); + acpi_queue_thermal_check(tz); return AE_OK; } diff --git a/drivers/android/binder.c b/drivers/android/binder.c index b5117576792bc6..2a3952925855de 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -3146,6 +3146,7 @@ static void binder_transaction(struct binder_proc *proc, t->buffer->debug_id = t->debug_id; t->buffer->transaction = t; t->buffer->target_node = target_node; + t->buffer->clear_on_free = !!(t->flags & TF_CLEAR_BUF); trace_binder_transaction_alloc_buf(t->buffer); if (binder_alloc_copy_user_to_buffer( diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c index 2f846b7ae8b823..7caf74ad24053a 100644 --- a/drivers/android/binder_alloc.c +++ b/drivers/android/binder_alloc.c @@ -696,6 +696,8 @@ static void binder_free_buf_locked(struct binder_alloc *alloc, binder_insert_free_buffer(alloc, buffer); } +static void binder_alloc_clear_buf(struct binder_alloc *alloc, + struct binder_buffer *buffer); /** * binder_alloc_free_buf() - free a binder buffer * @alloc: binder_alloc for this proc @@ -706,6 +708,18 @@ static void binder_free_buf_locked(struct binder_alloc *alloc, void binder_alloc_free_buf(struct binder_alloc *alloc, struct binder_buffer *buffer) { + /* + * We could eliminate the call to binder_alloc_clear_buf() + * from binder_alloc_deferred_release() by moving this to + * binder_alloc_free_buf_locked(). However, that could + * increase contention for the alloc mutex if clear_on_free + * is used frequently for large buffers. The mutex is not + * needed for correctness here. + */ + if (buffer->clear_on_free) { + binder_alloc_clear_buf(alloc, buffer); + buffer->clear_on_free = false; + } mutex_lock(&alloc->mutex); binder_free_buf_locked(alloc, buffer); mutex_unlock(&alloc->mutex); @@ -802,6 +816,10 @@ void binder_alloc_deferred_release(struct binder_alloc *alloc) /* Transaction should already have been freed */ BUG_ON(buffer->transaction); + if (buffer->clear_on_free) { + binder_alloc_clear_buf(alloc, buffer); + buffer->clear_on_free = false; + } binder_free_buf_locked(alloc, buffer); buffers++; } @@ -1135,6 +1153,36 @@ static struct page *binder_alloc_get_page(struct binder_alloc *alloc, return lru_page->page_ptr; } +/** + * binder_alloc_clear_buf() - zero out buffer + * @alloc: binder_alloc for this proc + * @buffer: binder buffer to be cleared + * + * memset the given buffer to 0 + */ +static void binder_alloc_clear_buf(struct binder_alloc *alloc, + struct binder_buffer *buffer) +{ + size_t bytes = binder_alloc_buffer_size(alloc, buffer); + binder_size_t buffer_offset = 0; + + while (bytes) { + unsigned long size; + struct page *page; + pgoff_t pgoff; + void *kptr; + + page = binder_alloc_get_page(alloc, buffer, + buffer_offset, &pgoff); + size = min_t(size_t, bytes, PAGE_SIZE - pgoff); + kptr = kmap(page) + pgoff; + memset(kptr, 0, size); + kunmap(page); + bytes -= size; + buffer_offset += size; + } +} + /** * binder_alloc_copy_user_to_buffer() - copy src user to tgt user * @alloc: binder_alloc for this proc diff --git a/drivers/android/binder_alloc.h b/drivers/android/binder_alloc.h index 55d8b4106766a6..6e8e001381af4b 100644 --- a/drivers/android/binder_alloc.h +++ b/drivers/android/binder_alloc.h @@ -23,6 +23,7 @@ struct binder_transaction; * @entry: entry alloc->buffers * @rb_node: node for allocated_buffers/free_buffers rb trees * @free: %true if buffer is free + * @clear_on_free: %true if buffer must be zeroed after use * @allow_user_free: %true if user is allowed to free buffer * @async_transaction: %true if buffer is in use for an async txn * @debug_id: unique ID for debugging @@ -41,9 +42,10 @@ struct binder_buffer { struct rb_node rb_node; /* free entry by size or allocated entry */ /* by address */ unsigned free:1; + unsigned clear_on_free:1; unsigned allow_user_free:1; unsigned async_transaction:1; - unsigned debug_id:29; + unsigned debug_id:28; struct binder_transaction *transaction; diff --git a/drivers/atm/idt77252.c b/drivers/atm/idt77252.c index 65a3886f68c9e3..5f0472c18bcbd7 100644 --- a/drivers/atm/idt77252.c +++ b/drivers/atm/idt77252.c @@ -3607,7 +3607,7 @@ static int idt77252_init_one(struct pci_dev *pcidev, if ((err = dma_set_mask_and_coherent(&pcidev->dev, DMA_BIT_MASK(32)))) { printk("idt77252: can't enable DMA for PCI device at %s\n", pci_name(pcidev)); - return err; + goto err_out_disable_pdev; } card = kzalloc(sizeof(struct idt77252_dev), GFP_KERNEL); diff --git a/drivers/base/core.c b/drivers/base/core.c index d661ada1518fb4..96f73aaf71da3a 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -115,6 +115,16 @@ int device_links_read_lock_held(void) #endif #endif /* !CONFIG_SRCU */ +static bool device_is_ancestor(struct device *dev, struct device *target) +{ + while (target->parent) { + target = target->parent; + if (dev == target) + return true; + } + return false; +} + /** * device_is_dependent - Check if one device depends on another one * @dev: Device to check dependencies for. @@ -128,7 +138,12 @@ int device_is_dependent(struct device *dev, void *target) struct device_link *link; int ret; - if (dev == target) + /* + * The "ancestors" check is needed to catch the case when the target + * device has not been completely initialized yet and it is still + * missing from the list of children of its parent device. + */ + if (dev == target || device_is_ancestor(dev, target)) return 1; ret = device_for_each_child(dev, target, device_is_dependent); @@ -363,7 +378,9 @@ static int devlink_add_symlinks(struct device *dev, struct device *con = link->consumer; char *buf; - len = max(strlen(dev_name(sup)), strlen(dev_name(con))); + len = max(strlen(dev_bus_name(sup)) + strlen(dev_name(sup)), + strlen(dev_bus_name(con)) + strlen(dev_name(con))); + len += strlen(":"); len += strlen("supplier:") + 1; buf = kzalloc(len, GFP_KERNEL); if (!buf) @@ -377,12 +394,12 @@ static int devlink_add_symlinks(struct device *dev, if (ret) goto err_con; - snprintf(buf, len, "consumer:%s", dev_name(con)); + snprintf(buf, len, "consumer:%s:%s", dev_bus_name(con), dev_name(con)); ret = sysfs_create_link(&sup->kobj, &link->link_dev.kobj, buf); if (ret) goto err_con_dev; - snprintf(buf, len, "supplier:%s", dev_name(sup)); + snprintf(buf, len, "supplier:%s:%s", dev_bus_name(sup), dev_name(sup)); ret = sysfs_create_link(&con->kobj, &link->link_dev.kobj, buf); if (ret) goto err_sup_dev; @@ -390,7 +407,7 @@ static int devlink_add_symlinks(struct device *dev, goto out; err_sup_dev: - snprintf(buf, len, "consumer:%s", dev_name(con)); + snprintf(buf, len, "consumer:%s:%s", dev_bus_name(con), dev_name(con)); sysfs_remove_link(&sup->kobj, buf); err_con_dev: sysfs_remove_link(&link->link_dev.kobj, "consumer"); @@ -413,7 +430,9 @@ static void devlink_remove_symlinks(struct device *dev, sysfs_remove_link(&link->link_dev.kobj, "consumer"); sysfs_remove_link(&link->link_dev.kobj, "supplier"); - len = max(strlen(dev_name(sup)), strlen(dev_name(con))); + len = max(strlen(dev_bus_name(sup)) + strlen(dev_name(sup)), + strlen(dev_bus_name(con)) + strlen(dev_name(con))); + len += strlen(":"); len += strlen("supplier:") + 1; buf = kzalloc(len, GFP_KERNEL); if (!buf) { @@ -421,9 +440,9 @@ static void devlink_remove_symlinks(struct device *dev, return; } - snprintf(buf, len, "supplier:%s", dev_name(sup)); + snprintf(buf, len, "supplier:%s:%s", dev_bus_name(sup), dev_name(sup)); sysfs_remove_link(&con->kobj, buf); - snprintf(buf, len, "consumer:%s", dev_name(con)); + snprintf(buf, len, "consumer:%s:%s", dev_bus_name(con), dev_name(con)); sysfs_remove_link(&sup->kobj, buf); kfree(buf); } @@ -633,8 +652,9 @@ struct device_link *device_link_add(struct device *consumer, link->link_dev.class = &devlink_class; device_set_pm_not_required(&link->link_dev); - dev_set_name(&link->link_dev, "%s--%s", - dev_name(supplier), dev_name(consumer)); + dev_set_name(&link->link_dev, "%s:%s--%s:%s", + dev_bus_name(supplier), dev_name(supplier), + dev_bus_name(consumer), dev_name(consumer)); if (device_register(&link->link_dev)) { put_device(consumer); put_device(supplier); @@ -1386,7 +1406,7 @@ static void device_links_purge(struct device *dev) return; mutex_lock(&wfs_lock); - list_del(&dev->links.needs_suppliers); + list_del_init(&dev->links.needs_suppliers); mutex_unlock(&wfs_lock); /* @@ -1652,9 +1672,7 @@ const char *dev_driver_string(const struct device *dev) * never change once they are set, so they don't need special care. */ drv = READ_ONCE(dev->driver); - return drv ? drv->name : - (dev->bus ? dev->bus->name : - (dev->class ? dev->class->name : "")); + return drv ? drv->name : dev_bus_name(dev); } EXPORT_SYMBOL(dev_driver_string); @@ -4278,7 +4296,7 @@ void set_primary_fwnode(struct device *dev, struct fwnode_handle *fwnode) if (fwnode_is_primary(fn)) { dev->fwnode = fn->secondary; if (!(parent && fn == parent->fwnode)) - fn->secondary = ERR_PTR(-ENODEV); + fn->secondary = NULL; } else { dev->fwnode = NULL; } diff --git a/drivers/base/dd.c b/drivers/base/dd.c index 148e81969e0462..3c94ebc8d4bb0d 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -612,6 +612,8 @@ static int really_probe(struct device *dev, struct device_driver *drv) else if (drv->remove) drv->remove(dev); probe_failed: + kfree(dev->dma_range_map); + dev->dma_range_map = NULL; if (dev->bus) blocking_notifier_call_chain(&dev->bus->p->bus_notifier, BUS_NOTIFY_DRIVER_NOT_BOUND, dev); diff --git a/drivers/base/regmap/regmap-debugfs.c b/drivers/base/regmap/regmap-debugfs.c index 8dfac7f3ed7aa4..ff2ee87987c7e6 100644 --- a/drivers/base/regmap/regmap-debugfs.c +++ b/drivers/base/regmap/regmap-debugfs.c @@ -582,8 +582,12 @@ void regmap_debugfs_init(struct regmap *map) devname = dev_name(map->dev); if (name) { - map->debugfs_name = kasprintf(GFP_KERNEL, "%s-%s", + if (!map->debugfs_name) { + map->debugfs_name = kasprintf(GFP_KERNEL, "%s-%s", devname, name); + if (!map->debugfs_name) + return; + } name = map->debugfs_name; } else { name = devname; @@ -591,9 +595,10 @@ void regmap_debugfs_init(struct regmap *map) if (!strcmp(name, "dummy")) { kfree(map->debugfs_name); - map->debugfs_name = kasprintf(GFP_KERNEL, "dummy%d", dummy_index); + if (!map->debugfs_name) + return; name = map->debugfs_name; dummy_index++; } diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index ecceaaa1a66ffa..f40ebe9f504746 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -451,6 +451,7 @@ config BLK_DEV_RBD config BLK_DEV_RSXX tristate "IBM Flash Adapter 900GB Full Height PCIe Device Driver" depends on PCI + select CRC32 help Device driver for IBM's high speed PCIe SSD storage device: Flash Adapter 900GB Full Height. diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index aaae9220f3a008..bd5c04fabdab65 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -1029,6 +1029,12 @@ static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg, if (!sock) return err; + /* + * We need to make sure we don't get any errant requests while we're + * reallocating the ->socks array. + */ + blk_mq_freeze_queue(nbd->disk->queue); + if (!netlink && !nbd->task_setup && !test_bit(NBD_RT_BOUND, &config->runtime_flags)) nbd->task_setup = current; @@ -1067,10 +1073,12 @@ static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg, nsock->cookie = 0; socks[config->num_connections++] = nsock; atomic_inc(&config->live_connections); + blk_mq_unfreeze_queue(nbd->disk->queue); return 0; put_socket: + blk_mq_unfreeze_queue(nbd->disk->queue); sockfd_put(sock); return err; } diff --git a/drivers/block/null_blk_zoned.c b/drivers/block/null_blk_zoned.c index beb34b4f76b0e6..172f720b8d637e 100644 --- a/drivers/block/null_blk_zoned.c +++ b/drivers/block/null_blk_zoned.c @@ -6,8 +6,7 @@ #define CREATE_TRACE_POINTS #include "null_blk_trace.h" -/* zone_size in MBs to sectors. */ -#define ZONE_SIZE_SHIFT 11 +#define MB_TO_SECTS(mb) (((sector_t)mb * SZ_1M) >> SECTOR_SHIFT) static inline unsigned int null_zone_no(struct nullb_device *dev, sector_t sect) { @@ -16,7 +15,7 @@ static inline unsigned int null_zone_no(struct nullb_device *dev, sector_t sect) int null_init_zoned_dev(struct nullb_device *dev, struct request_queue *q) { - sector_t dev_size = (sector_t)dev->size * 1024 * 1024; + sector_t dev_capacity_sects, zone_capacity_sects; sector_t sector = 0; unsigned int i; @@ -38,9 +37,13 @@ int null_init_zoned_dev(struct nullb_device *dev, struct request_queue *q) return -EINVAL; } - dev->zone_size_sects = dev->zone_size << ZONE_SIZE_SHIFT; - dev->nr_zones = dev_size >> - (SECTOR_SHIFT + ilog2(dev->zone_size_sects)); + zone_capacity_sects = MB_TO_SECTS(dev->zone_capacity); + dev_capacity_sects = MB_TO_SECTS(dev->size); + dev->zone_size_sects = MB_TO_SECTS(dev->zone_size); + dev->nr_zones = dev_capacity_sects >> ilog2(dev->zone_size_sects); + if (dev_capacity_sects & (dev->zone_size_sects - 1)) + dev->nr_zones++; + dev->zones = kvmalloc_array(dev->nr_zones, sizeof(struct blk_zone), GFP_KERNEL | __GFP_ZERO); if (!dev->zones) @@ -101,8 +104,12 @@ int null_init_zoned_dev(struct nullb_device *dev, struct request_queue *q) struct blk_zone *zone = &dev->zones[i]; zone->start = zone->wp = sector; - zone->len = dev->zone_size_sects; - zone->capacity = dev->zone_capacity << ZONE_SIZE_SHIFT; + if (zone->start + dev->zone_size_sects > dev_capacity_sects) + zone->len = dev_capacity_sects - zone->start; + else + zone->len = dev->zone_size_sects; + zone->capacity = + min_t(sector_t, zone->len, zone_capacity_sects); zone->type = BLK_ZONE_TYPE_SEQWRITE_REQ; zone->cond = BLK_ZONE_COND_EMPTY; @@ -332,8 +339,11 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector, trace_nullb_zone_op(cmd, zno, zone->cond); - if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL) + if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL) { + if (append) + return BLK_STS_IOERR; return null_process_cmd(cmd, REQ_OP_WRITE, sector, nr_sectors); + } null_lock_zone(dev, zno); diff --git a/drivers/block/rnbd/rnbd-clt-sysfs.c b/drivers/block/rnbd/rnbd-clt-sysfs.c index 4f4474eecadb76..d9dd138ca9c648 100644 --- a/drivers/block/rnbd/rnbd-clt-sysfs.c +++ b/drivers/block/rnbd/rnbd-clt-sysfs.c @@ -433,8 +433,9 @@ void rnbd_clt_remove_dev_symlink(struct rnbd_clt_dev *dev) * i.e. rnbd_clt_unmap_dev_store() leading to a sysfs warning because * of sysfs link already was removed already. */ - if (strlen(dev->blk_symlink_name) && try_module_get(THIS_MODULE)) { + if (dev->blk_symlink_name && try_module_get(THIS_MODULE)) { sysfs_remove_link(rnbd_devs_kobj, dev->blk_symlink_name); + kfree(dev->blk_symlink_name); module_put(THIS_MODULE); } } @@ -487,10 +488,17 @@ static int rnbd_clt_get_path_name(struct rnbd_clt_dev *dev, char *buf, static int rnbd_clt_add_dev_symlink(struct rnbd_clt_dev *dev) { struct kobject *gd_kobj = &disk_to_dev(dev->gd)->kobj; - int ret; + int ret, len; + + len = strlen(dev->pathname) + strlen(dev->sess->sessname) + 2; + dev->blk_symlink_name = kzalloc(len, GFP_KERNEL); + if (!dev->blk_symlink_name) { + rnbd_clt_err(dev, "Failed to allocate memory for blk_symlink_name\n"); + return -ENOMEM; + } ret = rnbd_clt_get_path_name(dev, dev->blk_symlink_name, - sizeof(dev->blk_symlink_name)); + len); if (ret) { rnbd_clt_err(dev, "Failed to get /sys/block symlink path, err: %d\n", ret); @@ -508,7 +516,8 @@ static int rnbd_clt_add_dev_symlink(struct rnbd_clt_dev *dev) return 0; out_err: - dev->blk_symlink_name[0] = '\0'; + kfree(dev->blk_symlink_name); + dev->blk_symlink_name = NULL ; return ret; } diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c index 8b2411ccbda97c..ba334fe7626db6 100644 --- a/drivers/block/rnbd/rnbd-clt.c +++ b/drivers/block/rnbd/rnbd-clt.c @@ -59,6 +59,7 @@ static void rnbd_clt_put_dev(struct rnbd_clt_dev *dev) ida_simple_remove(&index_ida, dev->clt_device_id); mutex_unlock(&ida_lock); kfree(dev->hw_queues); + kfree(dev->pathname); rnbd_clt_put_sess(dev->sess); mutex_destroy(&dev->lock); kfree(dev); @@ -1381,10 +1382,16 @@ static struct rnbd_clt_dev *init_dev(struct rnbd_clt_session *sess, pathname, sess->sessname, ret); goto out_queues; } + + dev->pathname = kstrdup(pathname, GFP_KERNEL); + if (!dev->pathname) { + ret = -ENOMEM; + goto out_queues; + } + dev->clt_device_id = ret; dev->sess = sess; dev->access_mode = access_mode; - strlcpy(dev->pathname, pathname, sizeof(dev->pathname)); mutex_init(&dev->lock); refcount_set(&dev->refcount, 1); dev->dev_state = DEV_STATE_INIT; @@ -1413,8 +1420,8 @@ static bool __exists_dev(const char *pathname) list_for_each_entry(sess, &sess_list, list) { mutex_lock(&sess->lock); list_for_each_entry(dev, &sess->devs_list, list) { - if (!strncmp(dev->pathname, pathname, - sizeof(dev->pathname))) { + if (strlen(dev->pathname) == strlen(pathname) && + !strcmp(dev->pathname, pathname)) { found = true; break; } @@ -1664,7 +1671,8 @@ static void rnbd_destroy_sessions(void) */ list_for_each_entry_safe(sess, sn, &sess_list, list) { - WARN_ON(!rnbd_clt_get_sess(sess)); + if (!rnbd_clt_get_sess(sess)) + continue; close_rtrs(sess); list_for_each_entry_safe(dev, tn, &sess->devs_list, list) { /* diff --git a/drivers/block/rnbd/rnbd-clt.h b/drivers/block/rnbd/rnbd-clt.h index ed33654aa4868f..b193d59040503b 100644 --- a/drivers/block/rnbd/rnbd-clt.h +++ b/drivers/block/rnbd/rnbd-clt.h @@ -108,7 +108,7 @@ struct rnbd_clt_dev { u32 clt_device_id; struct mutex lock; enum rnbd_clt_dev_state dev_state; - char pathname[NAME_MAX]; + char *pathname; enum rnbd_access_mode access_mode; bool read_only; bool rotational; @@ -126,7 +126,7 @@ struct rnbd_clt_dev { struct list_head list; struct gendisk *gd; struct kobject kobj; - char blk_symlink_name[NAME_MAX]; + char *blk_symlink_name; refcount_t refcount; struct work_struct unmap_on_rmmod_work; }; diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 76912c584a76dd..9860d4842f36c7 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -274,6 +274,7 @@ static int xen_blkif_disconnect(struct xen_blkif *blkif) if (ring->xenblkd) { kthread_stop(ring->xenblkd); + ring->xenblkd = NULL; wake_up(&ring->shutdown_wq); } @@ -675,7 +676,8 @@ static int xen_blkbk_probe(struct xenbus_device *dev, /* setup back pointer */ be->blkif->be = be; - err = xenbus_watch_pathfmt(dev, &be->backend_watch, backend_changed, + err = xenbus_watch_pathfmt(dev, &be->backend_watch, NULL, + backend_changed, "%s/%s", dev->nodename, "physical-device"); if (err) goto fail; diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 48629d3433b4c3..10078a74356441 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -945,7 +945,8 @@ static void blkif_set_queue_limits(struct blkfront_info *info) if (info->feature_discard) { blk_queue_flag_set(QUEUE_FLAG_DISCARD, rq); blk_queue_max_discard_sectors(rq, get_capacity(gd)); - rq->limits.discard_granularity = info->discard_granularity; + rq->limits.discard_granularity = info->discard_granularity ?: + info->physical_sector_size; rq->limits.discard_alignment = info->discard_alignment; if (info->feature_secdiscard) blk_queue_flag_set(QUEUE_FLAG_SECERASE, rq); @@ -2179,19 +2180,12 @@ static void blkfront_closing(struct blkfront_info *info) static void blkfront_setup_discard(struct blkfront_info *info) { - int err; - unsigned int discard_granularity; - unsigned int discard_alignment; - info->feature_discard = 1; - err = xenbus_gather(XBT_NIL, info->xbdev->otherend, - "discard-granularity", "%u", &discard_granularity, - "discard-alignment", "%u", &discard_alignment, - NULL); - if (!err) { - info->discard_granularity = discard_granularity; - info->discard_alignment = discard_alignment; - } + info->discard_granularity = xenbus_read_unsigned(info->xbdev->otherend, + "discard-granularity", + 0); + info->discard_alignment = xenbus_read_unsigned(info->xbdev->otherend, + "discard-alignment", 0); info->feature_secdiscard = !!xenbus_read_unsigned(info->xbdev->otherend, "discard-secure", 0); diff --git a/drivers/bluetooth/btmtksdio.c b/drivers/bluetooth/btmtksdio.c index ba45c59bd9f36c..5f9f027956317e 100644 --- a/drivers/bluetooth/btmtksdio.c +++ b/drivers/bluetooth/btmtksdio.c @@ -704,7 +704,7 @@ static int mtk_setup_firmware(struct hci_dev *hdev, const char *fwname) err = mtk_hci_wmt_sync(hdev, &wmt_params); if (err < 0) { bt_dev_err(hdev, "Failed to power on data RAM (%d)", err); - return err; + goto free_fw; } fw_ptr = fw->data; diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 1005b6e8ff7433..80468745d5c5e8 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -1763,6 +1763,8 @@ static int btusb_setup_bcm92035(struct hci_dev *hdev) static int btusb_setup_csr(struct hci_dev *hdev) { + struct btusb_data *data = hci_get_drvdata(hdev); + u16 bcdDevice = le16_to_cpu(data->udev->descriptor.bcdDevice); struct hci_rp_read_local_version *rp; struct sk_buff *skb; bool is_fake = false; @@ -1832,6 +1834,12 @@ static int btusb_setup_csr(struct hci_dev *hdev) le16_to_cpu(rp->hci_ver) > BLUETOOTH_VER_4_0) is_fake = true; + /* Other clones which beat all the above checks */ + else if (bcdDevice == 0x0134 && + le16_to_cpu(rp->lmp_subver) == 0x0c5c && + le16_to_cpu(rp->hci_ver) == BLUETOOTH_VER_2_0) + is_fake = true; + if (is_fake) { bt_dev_warn(hdev, "CSR: Unbranded CSR clone detected; adding workarounds..."); @@ -3067,7 +3075,7 @@ static int btusb_mtk_setup_firmware(struct hci_dev *hdev, const char *fwname) err = btusb_mtk_hci_wmt_sync(hdev, &wmt_params); if (err < 0) { bt_dev_err(hdev, "Failed to power on data RAM (%d)", err); - return err; + goto err_release_fw; } fw_ptr = fw->data; diff --git a/drivers/bluetooth/hci_h5.c b/drivers/bluetooth/hci_h5.c index 981d96cc769596..78d635f1d15675 100644 --- a/drivers/bluetooth/hci_h5.c +++ b/drivers/bluetooth/hci_h5.c @@ -245,6 +245,9 @@ static int h5_close(struct hci_uart *hu) skb_queue_purge(&h5->rel); skb_queue_purge(&h5->unrel); + kfree_skb(h5->rx_skb); + h5->rx_skb = NULL; + if (h5->vnd && h5->vnd->close) h5->vnd->close(h5); diff --git a/drivers/bus/fsl-mc/fsl-mc-allocator.c b/drivers/bus/fsl-mc/fsl-mc-allocator.c index e71a6f52ea0cfd..2d7c764bb7dcfc 100644 --- a/drivers/bus/fsl-mc/fsl-mc-allocator.c +++ b/drivers/bus/fsl-mc/fsl-mc-allocator.c @@ -292,8 +292,10 @@ int __must_check fsl_mc_object_allocate(struct fsl_mc_device *mc_dev, goto error; mc_adev = resource->data; - if (!mc_adev) + if (!mc_adev) { + error = -EINVAL; goto error; + } mc_adev->consumer_link = device_link_add(&mc_dev->dev, &mc_adev->dev, diff --git a/drivers/bus/fsl-mc/fsl-mc-bus.c b/drivers/bus/fsl-mc/fsl-mc-bus.c index 76a6ee505d33de..806766b1b45f66 100644 --- a/drivers/bus/fsl-mc/fsl-mc-bus.c +++ b/drivers/bus/fsl-mc/fsl-mc-bus.c @@ -967,8 +967,11 @@ static int fsl_mc_bus_probe(struct platform_device *pdev) platform_set_drvdata(pdev, mc); plat_res = platform_get_resource(pdev, IORESOURCE_MEM, 1); - if (plat_res) + if (plat_res) { mc->fsl_mc_regs = devm_ioremap_resource(&pdev->dev, plat_res); + if (IS_ERR(mc->fsl_mc_regs)) + return PTR_ERR(mc->fsl_mc_regs); + } if (mc->fsl_mc_regs && IS_ENABLED(CONFIG_ACPI) && !dev_of_node(&pdev->dev)) { diff --git a/drivers/bus/mhi/core/init.c b/drivers/bus/mhi/core/init.c index 0ffdebde826576..8cefa359fccd81 100644 --- a/drivers/bus/mhi/core/init.c +++ b/drivers/bus/mhi/core/init.c @@ -610,7 +610,7 @@ static int parse_ev_cfg(struct mhi_controller *mhi_cntrl, { struct mhi_event *mhi_event; const struct mhi_event_config *event_cfg; - struct device *dev = &mhi_cntrl->mhi_dev->dev; + struct device *dev = mhi_cntrl->cntrl_dev; int i, num; num = config->num_events; @@ -692,7 +692,7 @@ static int parse_ch_cfg(struct mhi_controller *mhi_cntrl, const struct mhi_controller_config *config) { const struct mhi_channel_config *ch_cfg; - struct device *dev = &mhi_cntrl->mhi_dev->dev; + struct device *dev = mhi_cntrl->cntrl_dev; int i; u32 chan; @@ -1276,10 +1276,8 @@ static int mhi_driver_remove(struct device *dev) mutex_unlock(&mhi_chan->mutex); } - read_lock_bh(&mhi_cntrl->pm_lock); while (mhi_dev->dev_wake) mhi_device_put(mhi_dev); - read_unlock_bh(&mhi_cntrl->pm_lock); return 0; } diff --git a/drivers/bus/mips_cdmm.c b/drivers/bus/mips_cdmm.c index 9f7ed1fcd42859..626dedd110cbc2 100644 --- a/drivers/bus/mips_cdmm.c +++ b/drivers/bus/mips_cdmm.c @@ -559,10 +559,8 @@ static void mips_cdmm_bus_discover(struct mips_cdmm_bus *bus) dev_set_name(&dev->dev, "cdmm%u-%u", cpu, id); ++id; ret = device_register(&dev->dev); - if (ret) { + if (ret) put_device(&dev->dev); - kfree(dev); - } } } diff --git a/drivers/char/hw_random/Kconfig b/drivers/char/hw_random/Kconfig index e92c4d9469d822..5952210526aaac 100644 --- a/drivers/char/hw_random/Kconfig +++ b/drivers/char/hw_random/Kconfig @@ -540,15 +540,15 @@ endif # HW_RANDOM config UML_RANDOM depends on UML - tristate "Hardware random number generator" + select HW_RANDOM + tristate "UML Random Number Generator support" help This option enables UML's "hardware" random number generator. It attaches itself to the host's /dev/random, supplying as much entropy as the host has, rather than the small amount the UML gets from its - own drivers. It registers itself as a standard hardware random number - generator, major 10, minor 183, and the canonical device name is - /dev/hwrng. - The way to make use of this is to install the rng-tools package - (check your distro, or download from - http://sourceforge.net/projects/gkernel/). rngd periodically reads - /dev/hwrng and injects the entropy into /dev/random. + own drivers. It registers itself as a rng-core driver thus providing + a device which is usually called /dev/hwrng. This hardware random + number generator does feed into the kernel's random number generator + entropy pool. + + If unsure, say Y. diff --git a/drivers/clk/at91/sam9x60.c b/drivers/clk/at91/sam9x60.c index 3c4c9560359542..c8cbec5308f028 100644 --- a/drivers/clk/at91/sam9x60.c +++ b/drivers/clk/at91/sam9x60.c @@ -174,7 +174,6 @@ static void __init sam9x60_pmc_setup(struct device_node *np) struct regmap *regmap; struct clk_hw *hw; int i; - bool bypass; i = of_property_match_string(np, "clock-names", "td_slck"); if (i < 0) @@ -209,10 +208,7 @@ static void __init sam9x60_pmc_setup(struct device_node *np) if (IS_ERR(hw)) goto err_free; - bypass = of_property_read_bool(np, "atmel,osc-bypass"); - - hw = at91_clk_register_main_osc(regmap, "main_osc", mainxtal_name, - bypass); + hw = at91_clk_register_main_osc(regmap, "main_osc", mainxtal_name, 0); if (IS_ERR(hw)) goto err_free; main_osc_hw = hw; diff --git a/drivers/clk/at91/sama7g5.c b/drivers/clk/at91/sama7g5.c index 0db2ab3eca147a..a092a940baa40b 100644 --- a/drivers/clk/at91/sama7g5.c +++ b/drivers/clk/at91/sama7g5.c @@ -838,7 +838,7 @@ static void __init sama7g5_pmc_setup(struct device_node *np) sama7g5_pmc = pmc_data_allocate(PMC_I2S1_MUX + 1, nck(sama7g5_systemck), nck(sama7g5_periphck), - nck(sama7g5_gck)); + nck(sama7g5_gck), 8); if (!sama7g5_pmc) return; @@ -980,6 +980,8 @@ static void __init sama7g5_pmc_setup(struct device_node *np) sama7g5_prog_mux_table); if (IS_ERR(hw)) goto err_free; + + sama7g5_pmc->pchws[i] = hw; } for (i = 0; i < ARRAY_SIZE(sama7g5_systemck); i++) { @@ -1052,7 +1054,7 @@ static void __init sama7g5_pmc_setup(struct device_node *np) kfree(alloc_mem); } - pmc_data_free(sama7g5_pmc); + kfree(sama7g5_pmc); } /* Some clks are used for a clocksource */ diff --git a/drivers/clk/bcm/clk-bcm2711-dvp.c b/drivers/clk/bcm/clk-bcm2711-dvp.c index 8333e20dc9d22d..69e2f85f7029df 100644 --- a/drivers/clk/bcm/clk-bcm2711-dvp.c +++ b/drivers/clk/bcm/clk-bcm2711-dvp.c @@ -108,6 +108,7 @@ static const struct of_device_id clk_dvp_dt_ids[] = { { .compatible = "brcm,brcm2711-dvp", }, { /* sentinel */ } }; +MODULE_DEVICE_TABLE(of, clk_dvp_dt_ids); static struct platform_driver clk_dvp_driver = { .probe = clk_dvp_probe, diff --git a/drivers/clk/clk-fsl-sai.c b/drivers/clk/clk-fsl-sai.c index 0221180a4dd734..1e81c8d8a6fd3e 100644 --- a/drivers/clk/clk-fsl-sai.c +++ b/drivers/clk/clk-fsl-sai.c @@ -68,9 +68,20 @@ static int fsl_sai_clk_probe(struct platform_device *pdev) if (IS_ERR(hw)) return PTR_ERR(hw); + platform_set_drvdata(pdev, hw); + return devm_of_clk_add_hw_provider(dev, of_clk_hw_simple_get, hw); } +static int fsl_sai_clk_remove(struct platform_device *pdev) +{ + struct clk_hw *hw = platform_get_drvdata(pdev); + + clk_hw_unregister_composite(hw); + + return 0; +} + static const struct of_device_id of_fsl_sai_clk_ids[] = { { .compatible = "fsl,vf610-sai-clock" }, { } @@ -79,6 +90,7 @@ MODULE_DEVICE_TABLE(of, of_fsl_sai_clk_ids); static struct platform_driver fsl_sai_clk_driver = { .probe = fsl_sai_clk_probe, + .remove = fsl_sai_clk_remove, .driver = { .name = "fsl-sai-clk", .of_match_table = of_fsl_sai_clk_ids, diff --git a/drivers/clk/clk-s2mps11.c b/drivers/clk/clk-s2mps11.c index aa21371f9104c9..a3e883a9f40671 100644 --- a/drivers/clk/clk-s2mps11.c +++ b/drivers/clk/clk-s2mps11.c @@ -195,6 +195,7 @@ static int s2mps11_clk_probe(struct platform_device *pdev) return ret; err_reg: + of_node_put(s2mps11_clks[0].clk_np); while (--i >= 0) clkdev_drop(s2mps11_clks[i].lookup); diff --git a/drivers/clk/clk-versaclock5.c b/drivers/clk/clk-versaclock5.c index c90460e7ef2153..43db67337bc068 100644 --- a/drivers/clk/clk-versaclock5.c +++ b/drivers/clk/clk-versaclock5.c @@ -739,8 +739,8 @@ static int vc5_update_power(struct device_node *np_output, { u32 value; - if (!of_property_read_u32(np_output, - "idt,voltage-microvolts", &value)) { + if (!of_property_read_u32(np_output, "idt,voltage-microvolt", + &value)) { clk_out->clk_output_cfg0_mask |= VC5_CLK_OUTPUT_CFG0_PWR_MASK; switch (value) { case 1800000: diff --git a/drivers/clk/imx/Kconfig b/drivers/clk/imx/Kconfig index 3061896503f300..47d9ec3abd2f7d 100644 --- a/drivers/clk/imx/Kconfig +++ b/drivers/clk/imx/Kconfig @@ -6,8 +6,6 @@ config MXC_CLK config MXC_CLK_SCU tristate - depends on ARCH_MXC - depends on IMX_SCU && HAVE_ARM_SMCCC config CLK_IMX1 def_bool SOC_IMX1 diff --git a/drivers/clk/ingenic/cgu.c b/drivers/clk/ingenic/cgu.c index dac6edc670cce4..c8e9cb6c8e39cc 100644 --- a/drivers/clk/ingenic/cgu.c +++ b/drivers/clk/ingenic/cgu.c @@ -392,15 +392,21 @@ static unsigned int ingenic_clk_calc_hw_div(const struct ingenic_cgu_clk_info *clk_info, unsigned int div) { - unsigned int i; + unsigned int i, best_i = 0, best = (unsigned int)-1; for (i = 0; i < (1 << clk_info->div.bits) && clk_info->div.div_table[i]; i++) { - if (clk_info->div.div_table[i] >= div) - return i; + if (clk_info->div.div_table[i] >= div && + clk_info->div.div_table[i] < best) { + best = clk_info->div.div_table[i]; + best_i = i; + + if (div == best) + break; + } } - return i - 1; + return best_i; } static unsigned diff --git a/drivers/clk/meson/Kconfig b/drivers/clk/meson/Kconfig index 034da203e8e0e3..9a8a548d839d82 100644 --- a/drivers/clk/meson/Kconfig +++ b/drivers/clk/meson/Kconfig @@ -110,6 +110,7 @@ config COMMON_CLK_G12A select COMMON_CLK_MESON_AO_CLKC select COMMON_CLK_MESON_EE_CLKC select COMMON_CLK_MESON_CPU_DYNDIV + select COMMON_CLK_MESON_VID_PLL_DIV select MFD_SYSCON help Support for the clock controller on Amlogic S905D2, S905X2 and S905Y2 diff --git a/drivers/clk/mmp/clk-audio.c b/drivers/clk/mmp/clk-audio.c index eea69d498bd273..7aa7f4a9564fde 100644 --- a/drivers/clk/mmp/clk-audio.c +++ b/drivers/clk/mmp/clk-audio.c @@ -392,7 +392,8 @@ static int mmp2_audio_clk_remove(struct platform_device *pdev) return 0; } -static int __maybe_unused mmp2_audio_clk_suspend(struct device *dev) +#ifdef CONFIG_PM +static int mmp2_audio_clk_suspend(struct device *dev) { struct mmp2_audio_clk *priv = dev_get_drvdata(dev); @@ -404,7 +405,7 @@ static int __maybe_unused mmp2_audio_clk_suspend(struct device *dev) return 0; } -static int __maybe_unused mmp2_audio_clk_resume(struct device *dev) +static int mmp2_audio_clk_resume(struct device *dev) { struct mmp2_audio_clk *priv = dev_get_drvdata(dev); @@ -415,6 +416,7 @@ static int __maybe_unused mmp2_audio_clk_resume(struct device *dev) return 0; } +#endif static const struct dev_pm_ops mmp2_audio_clk_pm_ops = { SET_RUNTIME_PM_OPS(mmp2_audio_clk_suspend, mmp2_audio_clk_resume, NULL) diff --git a/drivers/clk/mvebu/armada-37xx-xtal.c b/drivers/clk/mvebu/armada-37xx-xtal.c index e9e306d4e9af9d..41271351cf1f4f 100644 --- a/drivers/clk/mvebu/armada-37xx-xtal.c +++ b/drivers/clk/mvebu/armada-37xx-xtal.c @@ -13,8 +13,8 @@ #include #include -#define NB_GPIO1_LATCH 0xC -#define XTAL_MODE BIT(31) +#define NB_GPIO1_LATCH 0x8 +#define XTAL_MODE BIT(9) static int armada_3700_xtal_clock_probe(struct platform_device *pdev) { diff --git a/drivers/clk/qcom/gcc-sc7180.c b/drivers/clk/qcom/gcc-sc7180.c index 68d8f7aaf64e1f..b080739ab0c33a 100644 --- a/drivers/clk/qcom/gcc-sc7180.c +++ b/drivers/clk/qcom/gcc-sc7180.c @@ -642,7 +642,7 @@ static struct clk_rcg2 gcc_sdcc1_ice_core_clk_src = { .name = "gcc_sdcc1_ice_core_clk_src", .parent_data = gcc_parent_data_0, .num_parents = 4, - .ops = &clk_rcg2_ops, + .ops = &clk_rcg2_floor_ops, }, }; @@ -666,7 +666,7 @@ static struct clk_rcg2 gcc_sdcc2_apps_clk_src = { .name = "gcc_sdcc2_apps_clk_src", .parent_data = gcc_parent_data_5, .num_parents = 5, - .ops = &clk_rcg2_ops, + .ops = &clk_rcg2_floor_ops, }, }; diff --git a/drivers/clk/qcom/gcc-sm8250.c b/drivers/clk/qcom/gcc-sm8250.c index 6cb6617b8d88c2..ab594a0f0c4084 100644 --- a/drivers/clk/qcom/gcc-sm8250.c +++ b/drivers/clk/qcom/gcc-sm8250.c @@ -722,7 +722,7 @@ static struct clk_rcg2 gcc_sdcc2_apps_clk_src = { .name = "gcc_sdcc2_apps_clk_src", .parent_data = gcc_parent_data_4, .num_parents = 5, - .ops = &clk_rcg2_ops, + .ops = &clk_rcg2_floor_ops, }, }; @@ -745,7 +745,7 @@ static struct clk_rcg2 gcc_sdcc4_apps_clk_src = { .name = "gcc_sdcc4_apps_clk_src", .parent_data = gcc_parent_data_0, .num_parents = 3, - .ops = &clk_rcg2_ops, + .ops = &clk_rcg2_floor_ops, }, }; diff --git a/drivers/clk/renesas/r8a779a0-cpg-mssr.c b/drivers/clk/renesas/r8a779a0-cpg-mssr.c index 17ebbac7ddfb49..046d79416b7d0b 100644 --- a/drivers/clk/renesas/r8a779a0-cpg-mssr.c +++ b/drivers/clk/renesas/r8a779a0-cpg-mssr.c @@ -26,7 +26,6 @@ #include #include "renesas-cpg-mssr.h" -#include "rcar-gen3-cpg.h" enum rcar_r8a779a0_clk_types { CLK_TYPE_R8A779A0_MAIN = CLK_TYPE_CUSTOM, @@ -84,6 +83,14 @@ enum clk_ids { DEF_BASE(_name, _id, CLK_TYPE_R8A779A0_PLL2X_3X, CLK_MAIN, \ .offset = _offset) +#define DEF_MDSEL(_name, _id, _md, _parent0, _div0, _parent1, _div1) \ + DEF_BASE(_name, _id, CLK_TYPE_R8A779A0_MDSEL, \ + (_parent0) << 16 | (_parent1), \ + .div = (_div0) << 16 | (_div1), .offset = _md) + +#define DEF_OSC(_name, _id, _parent, _div) \ + DEF_BASE(_name, _id, CLK_TYPE_R8A779A0_OSC, _parent, .div = _div) + static const struct cpg_core_clk r8a779a0_core_clks[] __initconst = { /* External Clock Inputs */ DEF_INPUT("extal", CLK_EXTAL), @@ -136,8 +143,8 @@ static const struct cpg_core_clk r8a779a0_core_clks[] __initconst = { DEF_DIV6P1("canfd", R8A779A0_CLK_CANFD, CLK_PLL5_DIV4, 0x878), DEF_DIV6P1("csi0", R8A779A0_CLK_CSI0, CLK_PLL5_DIV4, 0x880), - DEF_GEN3_OSC("osc", R8A779A0_CLK_OSC, CLK_EXTAL, 8), - DEF_GEN3_MDSEL("r", R8A779A0_CLK_R, 29, CLK_EXTALR, 1, CLK_OCO, 1), + DEF_OSC("osc", R8A779A0_CLK_OSC, CLK_EXTAL, 8), + DEF_MDSEL("r", R8A779A0_CLK_R, 29, CLK_EXTALR, 1, CLK_OCO, 1), }; static const struct mssr_mod_clk r8a779a0_mod_clks[] __initconst = { diff --git a/drivers/clk/sunxi-ng/ccu-sun50i-a64.c b/drivers/clk/sunxi-ng/ccu-sun50i-a64.c index 5f66bf87977239..149cfde817cba7 100644 --- a/drivers/clk/sunxi-ng/ccu-sun50i-a64.c +++ b/drivers/clk/sunxi-ng/ccu-sun50i-a64.c @@ -389,6 +389,7 @@ static struct clk_div_table ths_div_table[] = { { .val = 1, .div = 2 }, { .val = 2, .div = 4 }, { .val = 3, .div = 6 }, + { /* Sentinel */ }, }; static const char * const ths_parents[] = { "osc24M" }; static struct ccu_div ths_clk = { diff --git a/drivers/clk/sunxi-ng/ccu-sun8i-h3.c b/drivers/clk/sunxi-ng/ccu-sun8i-h3.c index 6b636362379ee9..7e629a4493afde 100644 --- a/drivers/clk/sunxi-ng/ccu-sun8i-h3.c +++ b/drivers/clk/sunxi-ng/ccu-sun8i-h3.c @@ -322,6 +322,7 @@ static struct clk_div_table ths_div_table[] = { { .val = 1, .div = 2 }, { .val = 2, .div = 4 }, { .val = 3, .div = 6 }, + { /* Sentinel */ }, }; static SUNXI_CCU_DIV_TABLE_WITH_GATE(ths_clk, "ths", "osc24M", 0x074, 0, 2, ths_div_table, BIT(31), 0); diff --git a/drivers/clk/tegra/clk-dfll.c b/drivers/clk/tegra/clk-dfll.c index cfbaa90c7adbf6..a5f526bb0483e1 100644 --- a/drivers/clk/tegra/clk-dfll.c +++ b/drivers/clk/tegra/clk-dfll.c @@ -1856,13 +1856,13 @@ static int dfll_fetch_pwm_params(struct tegra_dfll *td) &td->reg_init_uV); if (!ret) { dev_err(td->dev, "couldn't get initialized voltage\n"); - return ret; + return -EINVAL; } ret = read_dt_param(td, "nvidia,pwm-period-nanoseconds", &pwm_period); if (!ret) { dev_err(td->dev, "couldn't get PWM period\n"); - return ret; + return -EINVAL; } td->pwm_rate = (NSEC_PER_SEC / pwm_period) * (MAX_DFLL_VOLTAGES - 1); diff --git a/drivers/clk/tegra/clk-id.h b/drivers/clk/tegra/clk-id.h index ff7da2d3e94d85..24413812ec5b6b 100644 --- a/drivers/clk/tegra/clk-id.h +++ b/drivers/clk/tegra/clk-id.h @@ -227,6 +227,7 @@ enum clk_id { tegra_clk_sdmmc4, tegra_clk_sdmmc4_8, tegra_clk_se, + tegra_clk_se_10, tegra_clk_soc_therm, tegra_clk_soc_therm_8, tegra_clk_sor0, diff --git a/drivers/clk/tegra/clk-tegra-periph.c b/drivers/clk/tegra/clk-tegra-periph.c index 2b2a3b81c16bab..60cc34f90cb9b4 100644 --- a/drivers/clk/tegra/clk-tegra-periph.c +++ b/drivers/clk/tegra/clk-tegra-periph.c @@ -630,7 +630,7 @@ static struct tegra_periph_init_data periph_clks[] = { INT8("host1x", mux_pllm_pllc2_c_c3_pllp_plla, CLK_SOURCE_HOST1X, 28, 0, tegra_clk_host1x_8), INT8("host1x", mux_pllc4_out1_pllc_pllc4_out2_pllp_clkm_plla_pllc4_out0, CLK_SOURCE_HOST1X, 28, 0, tegra_clk_host1x_9), INT8("se", mux_pllp_pllc2_c_c3_pllm_clkm, CLK_SOURCE_SE, 127, TEGRA_PERIPH_ON_APB, tegra_clk_se), - INT8("se", mux_pllp_pllc2_c_c3_clkm, CLK_SOURCE_SE, 127, TEGRA_PERIPH_ON_APB, tegra_clk_se), + INT8("se", mux_pllp_pllc2_c_c3_clkm, CLK_SOURCE_SE, 127, TEGRA_PERIPH_ON_APB, tegra_clk_se_10), INT8("2d", mux_pllm_pllc2_c_c3_pllp_plla, CLK_SOURCE_2D, 21, 0, tegra_clk_gr2d_8), INT8("3d", mux_pllm_pllc2_c_c3_pllp_plla, CLK_SOURCE_3D, 24, 0, tegra_clk_gr3d_8), INT8("vic03", mux_pllm_pllc_pllp_plla_pllc2_c3_clkm, CLK_SOURCE_VIC03, 178, 0, tegra_clk_vic03), diff --git a/drivers/clk/tegra/clk-tegra30.c b/drivers/clk/tegra/clk-tegra30.c index 37244a7e68c229..9cf249c344d9e4 100644 --- a/drivers/clk/tegra/clk-tegra30.c +++ b/drivers/clk/tegra/clk-tegra30.c @@ -1256,6 +1256,8 @@ static struct tegra_clk_init_table init_table[] __initdata = { { TEGRA30_CLK_I2S3_SYNC, TEGRA30_CLK_CLK_MAX, 24000000, 0 }, { TEGRA30_CLK_I2S4_SYNC, TEGRA30_CLK_CLK_MAX, 24000000, 0 }, { TEGRA30_CLK_VIMCLK_SYNC, TEGRA30_CLK_CLK_MAX, 24000000, 0 }, + { TEGRA30_CLK_HDA, TEGRA30_CLK_PLL_P, 102000000, 0 }, + { TEGRA30_CLK_HDA2CODEC_2X, TEGRA30_CLK_PLL_P, 48000000, 0 }, /* must be the last entry */ { TEGRA30_CLK_CLK_MAX, TEGRA30_CLK_CLK_MAX, 0, 0 }, }; diff --git a/drivers/clk/ti/fapll.c b/drivers/clk/ti/fapll.c index 95e36ba64accf5..8024c6d2b9e95c 100644 --- a/drivers/clk/ti/fapll.c +++ b/drivers/clk/ti/fapll.c @@ -498,6 +498,7 @@ static struct clk * __init ti_fapll_synth_setup(struct fapll_data *fd, { struct clk_init_data *init; struct fapll_synth *synth; + struct clk *clk = ERR_PTR(-ENOMEM); init = kzalloc(sizeof(*init), GFP_KERNEL); if (!init) @@ -520,13 +521,19 @@ static struct clk * __init ti_fapll_synth_setup(struct fapll_data *fd, synth->hw.init = init; synth->clk_pll = pll_clk; - return clk_register(NULL, &synth->hw); + clk = clk_register(NULL, &synth->hw); + if (IS_ERR(clk)) { + pr_err("failed to register clock\n"); + goto free; + } + + return clk; free: kfree(synth); kfree(init); - return ERR_PTR(-ENOMEM); + return clk; } static void __init ti_fapll_setup(struct device_node *node) diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig index 68b087bff59cc1..2be849bb794ac3 100644 --- a/drivers/clocksource/Kconfig +++ b/drivers/clocksource/Kconfig @@ -654,7 +654,7 @@ config ATCPIT100_TIMER config RISCV_TIMER bool "Timer for the RISC-V platform" if COMPILE_TEST - depends on GENERIC_SCHED_CLOCK && RISCV + depends on GENERIC_SCHED_CLOCK && RISCV && RISCV_SBI select TIMER_PROBE select TIMER_OF help diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c index 6c3e8418014613..d0177824c518b8 100644 --- a/drivers/clocksource/arm_arch_timer.c +++ b/drivers/clocksource/arm_arch_timer.c @@ -396,10 +396,10 @@ static void erratum_set_next_event_tval_generic(const int access, unsigned long ctrl &= ~ARCH_TIMER_CTRL_IT_MASK; if (access == ARCH_TIMER_PHYS_ACCESS) { - cval = evt + arch_counter_get_cntpct(); + cval = evt + arch_counter_get_cntpct_stable(); write_sysreg(cval, cntp_cval_el0); } else { - cval = evt + arch_counter_get_cntvct(); + cval = evt + arch_counter_get_cntvct_stable(); write_sysreg(cval, cntv_cval_el0); } @@ -822,15 +822,24 @@ static void arch_timer_evtstrm_enable(int divider) static void arch_timer_configure_evtstream(void) { - int evt_stream_div, pos; + int evt_stream_div, lsb; + + /* + * As the event stream can at most be generated at half the frequency + * of the counter, use half the frequency when computing the divider. + */ + evt_stream_div = arch_timer_rate / ARCH_TIMER_EVT_STREAM_FREQ / 2; + + /* + * Find the closest power of two to the divisor. If the adjacent bit + * of lsb (last set bit, starts from 0) is set, then we use (lsb + 1). + */ + lsb = fls(evt_stream_div) - 1; + if (lsb > 0 && (evt_stream_div & BIT(lsb - 1))) + lsb++; - /* Find the closest power of two to the divisor */ - evt_stream_div = arch_timer_rate / ARCH_TIMER_EVT_STREAM_FREQ; - pos = fls(evt_stream_div); - if (pos > 1 && !(evt_stream_div & (1 << (pos - 2)))) - pos--; /* enable event stream */ - arch_timer_evtstrm_enable(min(pos, 15)); + arch_timer_evtstrm_enable(max(0, min(lsb, 15))); } static void arch_counter_set_user_access(void) diff --git a/drivers/clocksource/ingenic-timer.c b/drivers/clocksource/ingenic-timer.c index 58fd9189fab7fe..905fd6b163a819 100644 --- a/drivers/clocksource/ingenic-timer.c +++ b/drivers/clocksource/ingenic-timer.c @@ -127,7 +127,7 @@ static irqreturn_t ingenic_tcu_cevt_cb(int irq, void *dev_id) return IRQ_HANDLED; } -static struct clk * __init ingenic_tcu_get_clock(struct device_node *np, int id) +static struct clk *ingenic_tcu_get_clock(struct device_node *np, int id) { struct of_phandle_args args; diff --git a/drivers/clocksource/timer-cadence-ttc.c b/drivers/clocksource/timer-cadence-ttc.c index 80e9606020307e..4efd0cf3b602d6 100644 --- a/drivers/clocksource/timer-cadence-ttc.c +++ b/drivers/clocksource/timer-cadence-ttc.c @@ -413,10 +413,8 @@ static int __init ttc_setup_clockevent(struct clk *clk, ttcce->ttc.clk = clk; err = clk_prepare_enable(ttcce->ttc.clk); - if (err) { - kfree(ttcce); - return err; - } + if (err) + goto out_kfree; ttcce->ttc.clk_rate_change_nb.notifier_call = ttc_rate_change_clockevent_cb; @@ -426,7 +424,7 @@ static int __init ttc_setup_clockevent(struct clk *clk, &ttcce->ttc.clk_rate_change_nb); if (err) { pr_warn("Unable to register clock notifier.\n"); - return err; + goto out_kfree; } ttcce->ttc.freq = clk_get_rate(ttcce->ttc.clk); @@ -455,15 +453,17 @@ static int __init ttc_setup_clockevent(struct clk *clk, err = request_irq(irq, ttc_clock_event_interrupt, IRQF_TIMER, ttcce->ce.name, ttcce); - if (err) { - kfree(ttcce); - return err; - } + if (err) + goto out_kfree; clockevents_config_and_register(&ttcce->ce, ttcce->ttc.freq / PRESCALE, 1, 0xfffe); return 0; + +out_kfree: + kfree(ttcce); + return err; } static int __init ttc_timer_probe(struct platform_device *pdev) diff --git a/drivers/clocksource/timer-orion.c b/drivers/clocksource/timer-orion.c index d01ff418186766..5101e834d78fff 100644 --- a/drivers/clocksource/timer-orion.c +++ b/drivers/clocksource/timer-orion.c @@ -143,7 +143,8 @@ static int __init orion_timer_init(struct device_node *np) irq = irq_of_parse_and_map(np, 1); if (irq <= 0) { pr_err("%pOFn: unable to parse timer1 irq\n", np); - return -EINVAL; + ret = -EINVAL; + goto out_unprep_clk; } rate = clk_get_rate(clk); @@ -160,7 +161,7 @@ static int __init orion_timer_init(struct device_node *np) clocksource_mmio_readl_down); if (ret) { pr_err("Failed to initialize mmio timer\n"); - return ret; + goto out_unprep_clk; } sched_clock_register(orion_read_sched_clock, 32, rate); @@ -170,7 +171,7 @@ static int __init orion_timer_init(struct device_node *np) "orion_event", NULL); if (ret) { pr_err("%pOFn: unable to setup irq\n", np); - return ret; + goto out_unprep_clk; } ticks_per_jiffy = (clk_get_rate(clk) + HZ/2) / HZ; @@ -183,5 +184,9 @@ static int __init orion_timer_init(struct device_node *np) orion_delay_timer_init(rate); return 0; + +out_unprep_clk: + clk_disable_unprepare(clk); + return ret; } TIMER_OF_DECLARE(orion_timer, "marvell,orion-timer", orion_timer_init); diff --git a/drivers/counter/microchip-tcb-capture.c b/drivers/counter/microchip-tcb-capture.c index 039c54a78aa576..710acc0a37044c 100644 --- a/drivers/counter/microchip-tcb-capture.c +++ b/drivers/counter/microchip-tcb-capture.c @@ -183,16 +183,20 @@ static int mchp_tc_count_action_get(struct counter_device *counter, regmap_read(priv->regmap, ATMEL_TC_REG(priv->channel[0], CMR), &cmr); - *action = MCHP_TC_SYNAPSE_ACTION_NONE; - - if (cmr & ATMEL_TC_ETRGEDG_NONE) + switch (cmr & ATMEL_TC_ETRGEDG) { + default: *action = MCHP_TC_SYNAPSE_ACTION_NONE; - else if (cmr & ATMEL_TC_ETRGEDG_RISING) + break; + case ATMEL_TC_ETRGEDG_RISING: *action = MCHP_TC_SYNAPSE_ACTION_RISING_EDGE; - else if (cmr & ATMEL_TC_ETRGEDG_FALLING) + break; + case ATMEL_TC_ETRGEDG_FALLING: *action = MCHP_TC_SYNAPSE_ACTION_FALLING_EDGE; - else if (cmr & ATMEL_TC_ETRGEDG_BOTH) + break; + case ATMEL_TC_ETRGEDG_BOTH: *action = MCHP_TC_SYNAPSE_ACTION_BOTH_EDGE; + break; + } return 0; } diff --git a/drivers/counter/ti-eqep.c b/drivers/counter/ti-eqep.c index a60aee1a1a2915..65df9ef5b5bc05 100644 --- a/drivers/counter/ti-eqep.c +++ b/drivers/counter/ti-eqep.c @@ -235,36 +235,6 @@ static ssize_t ti_eqep_position_ceiling_write(struct counter_device *counter, return len; } -static ssize_t ti_eqep_position_floor_read(struct counter_device *counter, - struct counter_count *count, - void *ext_priv, char *buf) -{ - struct ti_eqep_cnt *priv = counter->priv; - u32 qposinit; - - regmap_read(priv->regmap32, QPOSINIT, &qposinit); - - return sprintf(buf, "%u\n", qposinit); -} - -static ssize_t ti_eqep_position_floor_write(struct counter_device *counter, - struct counter_count *count, - void *ext_priv, const char *buf, - size_t len) -{ - struct ti_eqep_cnt *priv = counter->priv; - int err; - u32 res; - - err = kstrtouint(buf, 0, &res); - if (err < 0) - return err; - - regmap_write(priv->regmap32, QPOSINIT, res); - - return len; -} - static ssize_t ti_eqep_position_enable_read(struct counter_device *counter, struct counter_count *count, void *ext_priv, char *buf) @@ -301,11 +271,6 @@ static struct counter_count_ext ti_eqep_position_ext[] = { .read = ti_eqep_position_ceiling_read, .write = ti_eqep_position_ceiling_write, }, - { - .name = "floor", - .read = ti_eqep_position_floor_read, - .write = ti_eqep_position_floor_write, - }, { .name = "enable", .read = ti_eqep_position_enable_read, diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm index 015ec0c0283587..1f73fa75b1a051 100644 --- a/drivers/cpufreq/Kconfig.arm +++ b/drivers/cpufreq/Kconfig.arm @@ -94,7 +94,7 @@ config ARM_IMX6Q_CPUFREQ tristate "Freescale i.MX6 cpufreq support" depends on ARCH_MXC depends on REGULATOR_ANATOP - select NVMEM_IMX_OCOTP + depends on NVMEM_IMX_OCOTP || COMPILE_TEST select PM_OPP help This adds cpufreq driver support for Freescale i.MX6 series SoCs. diff --git a/drivers/cpufreq/armada-8k-cpufreq.c b/drivers/cpufreq/armada-8k-cpufreq.c index 39e34f5066d3df..b0fc5e84f8570f 100644 --- a/drivers/cpufreq/armada-8k-cpufreq.c +++ b/drivers/cpufreq/armada-8k-cpufreq.c @@ -204,6 +204,12 @@ static void __exit armada_8k_cpufreq_exit(void) } module_exit(armada_8k_cpufreq_exit); +static const struct of_device_id __maybe_unused armada_8k_cpufreq_of_match[] = { + { .compatible = "marvell,ap806-cpu-clock" }, + { }, +}; +MODULE_DEVICE_TABLE(of, armada_8k_cpufreq_of_match); + MODULE_AUTHOR("Gregory Clement "); MODULE_DESCRIPTION("Armada 8K cpufreq driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/cpufreq/highbank-cpufreq.c b/drivers/cpufreq/highbank-cpufreq.c index 5a7f6dafcddb63..ac57cddc5f2fe5 100644 --- a/drivers/cpufreq/highbank-cpufreq.c +++ b/drivers/cpufreq/highbank-cpufreq.c @@ -101,6 +101,13 @@ static int hb_cpufreq_driver_init(void) } module_init(hb_cpufreq_driver_init); +static const struct of_device_id __maybe_unused hb_cpufreq_of_match[] = { + { .compatible = "calxeda,highbank" }, + { .compatible = "calxeda,ecx-2000" }, + { }, +}; +MODULE_DEVICE_TABLE(of, hb_cpufreq_of_match); + MODULE_AUTHOR("Mark Langsdorf "); MODULE_DESCRIPTION("Calxeda Highbank cpufreq driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 36a3ccfe6d3d1c..cb95da684457f4 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -2207,9 +2207,9 @@ static void intel_pstate_update_perf_limits(struct cpudata *cpu, unsigned int policy_min, unsigned int policy_max) { - int max_freq = intel_pstate_get_max_freq(cpu); int32_t max_policy_perf, min_policy_perf; int max_state, turbo_max; + int max_freq; /* * HWP needs some special consideration, because on BDX the @@ -2223,6 +2223,7 @@ static void intel_pstate_update_perf_limits(struct cpudata *cpu, cpu->pstate.max_pstate : cpu->pstate.turbo_pstate; turbo_max = cpu->pstate.turbo_pstate; } + max_freq = max_state * cpu->pstate.scaling; max_policy_perf = max_state * policy_max / max_freq; if (policy_max == policy_min) { @@ -2325,9 +2326,18 @@ static void intel_pstate_adjust_policy_max(struct cpudata *cpu, static void intel_pstate_verify_cpu_policy(struct cpudata *cpu, struct cpufreq_policy_data *policy) { + int max_freq; + update_turbo_state(); - cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq, - intel_pstate_get_max_freq(cpu)); + if (hwp_active) { + int max_state, turbo_max; + + intel_pstate_get_hwp_max(cpu->cpu, &turbo_max, &max_state); + max_freq = max_state * cpu->pstate.scaling; + } else { + max_freq = intel_pstate_get_max_freq(cpu); + } + cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq, max_freq); intel_pstate_adjust_policy_max(cpu, policy); } diff --git a/drivers/cpufreq/loongson1-cpufreq.c b/drivers/cpufreq/loongson1-cpufreq.c index 0ea88778882ac9..86f612593e4974 100644 --- a/drivers/cpufreq/loongson1-cpufreq.c +++ b/drivers/cpufreq/loongson1-cpufreq.c @@ -216,6 +216,7 @@ static struct platform_driver ls1x_cpufreq_platdrv = { module_platform_driver(ls1x_cpufreq_platdrv); +MODULE_ALIAS("platform:ls1x-cpufreq"); MODULE_AUTHOR("Kelvin Cheung "); MODULE_DESCRIPTION("Loongson1 CPUFreq driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/cpufreq/mediatek-cpufreq.c b/drivers/cpufreq/mediatek-cpufreq.c index 7d1212c9b7c88a..a310372dc53e9d 100644 --- a/drivers/cpufreq/mediatek-cpufreq.c +++ b/drivers/cpufreq/mediatek-cpufreq.c @@ -540,6 +540,7 @@ static const struct of_device_id mtk_cpufreq_machines[] __initconst = { { } }; +MODULE_DEVICE_TABLE(of, mtk_cpufreq_machines); static int __init mtk_cpufreq_driver_init(void) { diff --git a/drivers/cpufreq/powernow-k8.c b/drivers/cpufreq/powernow-k8.c index 0acc9e241cd7d6..b9ccb6a3dad98b 100644 --- a/drivers/cpufreq/powernow-k8.c +++ b/drivers/cpufreq/powernow-k8.c @@ -878,9 +878,9 @@ static int get_transition_latency(struct powernow_k8_data *data) /* Take a frequency, and issue the fid/vid transition command */ static int transition_frequency_fidvid(struct powernow_k8_data *data, - unsigned int index) + unsigned int index, + struct cpufreq_policy *policy) { - struct cpufreq_policy *policy; u32 fid = 0; u32 vid = 0; int res; @@ -912,9 +912,6 @@ static int transition_frequency_fidvid(struct powernow_k8_data *data, freqs.old = find_khz_freq_from_fid(data->currfid); freqs.new = find_khz_freq_from_fid(fid); - policy = cpufreq_cpu_get(smp_processor_id()); - cpufreq_cpu_put(policy); - cpufreq_freq_transition_begin(policy, &freqs); res = transition_fid_vid(data, fid, vid); cpufreq_freq_transition_end(policy, &freqs, res); @@ -969,7 +966,7 @@ static long powernowk8_target_fn(void *arg) powernow_k8_acpi_pst_values(data, newstate); - ret = transition_frequency_fidvid(data, newstate); + ret = transition_frequency_fidvid(data, newstate, pol); if (ret) { pr_err("transition frequency failed\n"); diff --git a/drivers/cpufreq/qcom-cpufreq-nvmem.c b/drivers/cpufreq/qcom-cpufreq-nvmem.c index d06b37822c3dff..fba9937a406b38 100644 --- a/drivers/cpufreq/qcom-cpufreq-nvmem.c +++ b/drivers/cpufreq/qcom-cpufreq-nvmem.c @@ -464,6 +464,7 @@ static const struct of_device_id qcom_cpufreq_match_list[] __initconst = { { .compatible = "qcom,msm8960", .data = &match_data_krait }, {}, }; +MODULE_DEVICE_TABLE(of, qcom_cpufreq_match_list); /* * Since the driver depends on smem and nvmem drivers, which may diff --git a/drivers/cpufreq/scpi-cpufreq.c b/drivers/cpufreq/scpi-cpufreq.c index 43db05b949d956..e5140ad63db836 100644 --- a/drivers/cpufreq/scpi-cpufreq.c +++ b/drivers/cpufreq/scpi-cpufreq.c @@ -233,6 +233,7 @@ static struct platform_driver scpi_cpufreq_platdrv = { }; module_platform_driver(scpi_cpufreq_platdrv); +MODULE_ALIAS("platform:scpi-cpufreq"); MODULE_AUTHOR("Sudeep Holla "); MODULE_DESCRIPTION("ARM SCPI CPUFreq interface driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/cpufreq/sti-cpufreq.c b/drivers/cpufreq/sti-cpufreq.c index 4ac6fb23792a02..c40d3d7d4ea434 100644 --- a/drivers/cpufreq/sti-cpufreq.c +++ b/drivers/cpufreq/sti-cpufreq.c @@ -292,6 +292,13 @@ static int sti_cpufreq_init(void) } module_init(sti_cpufreq_init); +static const struct of_device_id __maybe_unused sti_cpufreq_of_match[] = { + { .compatible = "st,stih407" }, + { .compatible = "st,stih410" }, + { }, +}; +MODULE_DEVICE_TABLE(of, sti_cpufreq_of_match); + MODULE_DESCRIPTION("STMicroelectronics CPUFreq/OPP driver"); MODULE_AUTHOR("Ajitpal Singh "); MODULE_AUTHOR("Lee Jones "); diff --git a/drivers/cpufreq/sun50i-cpufreq-nvmem.c b/drivers/cpufreq/sun50i-cpufreq-nvmem.c index 9907a165135b76..2deed8d8773fa6 100644 --- a/drivers/cpufreq/sun50i-cpufreq-nvmem.c +++ b/drivers/cpufreq/sun50i-cpufreq-nvmem.c @@ -167,6 +167,7 @@ static const struct of_device_id sun50i_cpufreq_match_list[] = { { .compatible = "allwinner,sun50i-h6" }, {} }; +MODULE_DEVICE_TABLE(of, sun50i_cpufreq_match_list); static const struct of_device_id *sun50i_cpufreq_match_node(void) { diff --git a/drivers/cpufreq/vexpress-spc-cpufreq.c b/drivers/cpufreq/vexpress-spc-cpufreq.c index e89b905754d211..f711d8eaea6a25 100644 --- a/drivers/cpufreq/vexpress-spc-cpufreq.c +++ b/drivers/cpufreq/vexpress-spc-cpufreq.c @@ -591,6 +591,7 @@ static struct platform_driver ve_spc_cpufreq_platdrv = { }; module_platform_driver(ve_spc_cpufreq_platdrv); +MODULE_ALIAS("platform:vexpress-spc-cpufreq"); MODULE_AUTHOR("Viresh Kumar "); MODULE_AUTHOR("Sudeep Holla "); MODULE_DESCRIPTION("Vexpress SPC ARM big LITTLE cpufreq driver"); diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index 37da0c070a8832..ff5e85eefbf69d 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig @@ -366,6 +366,7 @@ if CRYPTO_DEV_OMAP config CRYPTO_DEV_OMAP_SHAM tristate "Support for OMAP MD5/SHA1/SHA2 hw accelerator" depends on ARCH_OMAP2PLUS + select CRYPTO_ENGINE select CRYPTO_SHA1 select CRYPTO_MD5 select CRYPTO_SHA256 @@ -548,6 +549,7 @@ config CRYPTO_DEV_ATMEL_SHA config CRYPTO_DEV_ATMEL_I2C tristate + select BITREVERSE config CRYPTO_DEV_ATMEL_ECC tristate "Support for Microchip / Atmel ECC hw accelerator" diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c index a94bf28f858a7f..4c5a2c11d7141d 100644 --- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c +++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c @@ -262,13 +262,13 @@ int sun8i_ce_hash_run(struct crypto_engine *engine, void *breq) u32 common; u64 byte_count; __le32 *bf; - void *buf; + void *buf = NULL; int j, i, todo; int nbw = 0; u64 fill, min_fill; __be64 *bebits; __le64 *lebits; - void *result; + void *result = NULL; u64 bs; int digestsize; dma_addr_t addr_res, addr_pad; @@ -285,13 +285,17 @@ int sun8i_ce_hash_run(struct crypto_engine *engine, void *breq) /* the padding could be up to two block. */ buf = kzalloc(bs * 2, GFP_KERNEL | GFP_DMA); - if (!buf) - return -ENOMEM; + if (!buf) { + err = -ENOMEM; + goto theend; + } bf = (__le32 *)buf; result = kzalloc(digestsize, GFP_KERNEL | GFP_DMA); - if (!result) - return -ENOMEM; + if (!result) { + err = -ENOMEM; + goto theend; + } flow = rctx->flow; chan = &ce->chanlist[flow]; @@ -403,11 +407,11 @@ int sun8i_ce_hash_run(struct crypto_engine *engine, void *breq) dma_unmap_sg(ce->dev, areq->src, nr_sgs, DMA_TO_DEVICE); dma_unmap_single(ce->dev, addr_res, digestsize, DMA_FROM_DEVICE); - kfree(buf); memcpy(areq->result, result, algt->alg.hash.halg.digestsize); - kfree(result); theend: + kfree(buf); + kfree(result); crypto_finalize_hash_request(engine, breq, err); return 0; } diff --git a/drivers/crypto/amcc/crypto4xx_core.c b/drivers/crypto/amcc/crypto4xx_core.c index 981de43ea5e243..2e3690f65786d0 100644 --- a/drivers/crypto/amcc/crypto4xx_core.c +++ b/drivers/crypto/amcc/crypto4xx_core.c @@ -917,7 +917,7 @@ int crypto4xx_build_pd(struct crypto_async_request *req, } pd->pd_ctl.w = PD_CTL_HOST_READY | - ((crypto_tfm_alg_type(req->tfm) == CRYPTO_ALG_TYPE_AHASH) | + ((crypto_tfm_alg_type(req->tfm) == CRYPTO_ALG_TYPE_AHASH) || (crypto_tfm_alg_type(req->tfm) == CRYPTO_ALG_TYPE_AEAD) ? PD_CTL_HASH_FINAL : 0); pd->pd_ctl_len.w = 0x00400000 | (assoclen + datalen); diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c index cf5bd7666dfcd9..8697ae53b0633c 100644 --- a/drivers/crypto/caam/caamalg.c +++ b/drivers/crypto/caam/caamalg.c @@ -3404,8 +3404,8 @@ static int caam_cra_init(struct crypto_skcipher *tfm) fallback = crypto_alloc_skcipher(tfm_name, 0, CRYPTO_ALG_NEED_FALLBACK); if (IS_ERR(fallback)) { - dev_err(ctx->jrdev, "Failed to allocate %s fallback: %ld\n", - tfm_name, PTR_ERR(fallback)); + pr_err("Failed to allocate %s fallback: %ld\n", + tfm_name, PTR_ERR(fallback)); return PTR_ERR(fallback); } diff --git a/drivers/crypto/caam/caamalg_qi.c b/drivers/crypto/caam/caamalg_qi.c index 66f60d78bdc84f..a24ae966df4a30 100644 --- a/drivers/crypto/caam/caamalg_qi.c +++ b/drivers/crypto/caam/caamalg_qi.c @@ -2502,8 +2502,8 @@ static int caam_cra_init(struct crypto_skcipher *tfm) fallback = crypto_alloc_skcipher(tfm_name, 0, CRYPTO_ALG_NEED_FALLBACK); if (IS_ERR(fallback)) { - dev_err(ctx->jrdev, "Failed to allocate %s fallback: %ld\n", - tfm_name, PTR_ERR(fallback)); + pr_err("Failed to allocate %s fallback: %ld\n", + tfm_name, PTR_ERR(fallback)); return PTR_ERR(fallback); } diff --git a/drivers/crypto/caam/caamalg_qi2.c b/drivers/crypto/caam/caamalg_qi2.c index 98c1ff1744bb19..a780e627838ae9 100644 --- a/drivers/crypto/caam/caamalg_qi2.c +++ b/drivers/crypto/caam/caamalg_qi2.c @@ -1611,7 +1611,8 @@ static int caam_cra_init_skcipher(struct crypto_skcipher *tfm) fallback = crypto_alloc_skcipher(tfm_name, 0, CRYPTO_ALG_NEED_FALLBACK); if (IS_ERR(fallback)) { - dev_err(ctx->dev, "Failed to allocate %s fallback: %ld\n", + dev_err(caam_alg->caam.dev, + "Failed to allocate %s fallback: %ld\n", tfm_name, PTR_ERR(fallback)); return PTR_ERR(fallback); } diff --git a/drivers/crypto/inside-secure/safexcel.c b/drivers/crypto/inside-secure/safexcel.c index eb2418450f120e..2e1562108a858a 100644 --- a/drivers/crypto/inside-secure/safexcel.c +++ b/drivers/crypto/inside-secure/safexcel.c @@ -1639,7 +1639,7 @@ static int safexcel_probe_generic(void *pdev, priv->ring[i].rdr_req = devm_kcalloc(dev, EIP197_DEFAULT_RING_SIZE, - sizeof(priv->ring[i].rdr_req), + sizeof(*priv->ring[i].rdr_req), GFP_KERNEL); if (!priv->ring[i].rdr_req) return -ENOMEM; diff --git a/drivers/crypto/marvell/cesa/cesa.h b/drivers/crypto/marvell/cesa/cesa.h index fabfaaccca8720..fa56b45620c796 100644 --- a/drivers/crypto/marvell/cesa/cesa.h +++ b/drivers/crypto/marvell/cesa/cesa.h @@ -300,11 +300,11 @@ struct mv_cesa_tdma_desc { __le32 byte_cnt; union { __le32 src; - dma_addr_t src_dma; + u32 src_dma; }; union { __le32 dst; - dma_addr_t dst_dma; + u32 dst_dma; }; __le32 next_dma; diff --git a/drivers/crypto/omap-aes.c b/drivers/crypto/omap-aes.c index 4fd14d90cc4099..1b1e0ab0a831a5 100644 --- a/drivers/crypto/omap-aes.c +++ b/drivers/crypto/omap-aes.c @@ -1137,7 +1137,7 @@ static int omap_aes_probe(struct platform_device *pdev) if (err < 0) { dev_err(dev, "%s: failed to get_sync(%d)\n", __func__, err); - goto err_res; + goto err_pm_disable; } omap_aes_dma_stop(dd); @@ -1246,6 +1246,7 @@ static int omap_aes_probe(struct platform_device *pdev) omap_aes_dma_cleanup(dd); err_irq: tasklet_kill(&dd->done_task); +err_pm_disable: pm_runtime_disable(dev); err_res: dd = NULL; diff --git a/drivers/crypto/qat/qat_common/qat_hal.c b/drivers/crypto/qat/qat_common/qat_hal.c index 6b9d47682d04de..52ef80efeddc6f 100644 --- a/drivers/crypto/qat/qat_common/qat_hal.c +++ b/drivers/crypto/qat/qat_common/qat_hal.c @@ -1146,7 +1146,7 @@ static int qat_hal_put_rel_rd_xfer(struct icp_qat_fw_loader_handle *handle, unsigned short mask; unsigned short dr_offset = 0x10; - status = ctx_enables = qat_hal_rd_ae_csr(handle, ae, CTX_ENABLES); + ctx_enables = qat_hal_rd_ae_csr(handle, ae, CTX_ENABLES); if (CE_INUSE_CONTEXTS & ctx_enables) { if (ctx & 0x1) { pr_err("QAT: bad 4-ctx mode,ctx=0x%x\n", ctx); diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index 66773892f665d0..a713a35dc5022d 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -460,7 +460,7 @@ DEF_TALITOS2_DONE(ch1_3, TALITOS2_ISR_CH_1_3_DONE) /* * locate current (offending) descriptor */ -static u32 current_desc_hdr(struct device *dev, int ch) +static __be32 current_desc_hdr(struct device *dev, int ch) { struct talitos_private *priv = dev_get_drvdata(dev); int tail, iter; @@ -478,7 +478,7 @@ static u32 current_desc_hdr(struct device *dev, int ch) iter = tail; while (priv->chan[ch].fifo[iter].dma_desc != cur_desc && - priv->chan[ch].fifo[iter].desc->next_desc != cur_desc) { + priv->chan[ch].fifo[iter].desc->next_desc != cpu_to_be32(cur_desc)) { iter = (iter + 1) & (priv->fifo_len - 1); if (iter == tail) { dev_err(dev, "couldn't locate current descriptor\n"); @@ -486,7 +486,7 @@ static u32 current_desc_hdr(struct device *dev, int ch) } } - if (priv->chan[ch].fifo[iter].desc->next_desc == cur_desc) { + if (priv->chan[ch].fifo[iter].desc->next_desc == cpu_to_be32(cur_desc)) { struct talitos_edesc *edesc; edesc = container_of(priv->chan[ch].fifo[iter].desc, @@ -501,13 +501,13 @@ static u32 current_desc_hdr(struct device *dev, int ch) /* * user diagnostics; report root cause of error based on execution unit status */ -static void report_eu_error(struct device *dev, int ch, u32 desc_hdr) +static void report_eu_error(struct device *dev, int ch, __be32 desc_hdr) { struct talitos_private *priv = dev_get_drvdata(dev); int i; if (!desc_hdr) - desc_hdr = in_be32(priv->chan[ch].reg + TALITOS_DESCBUF); + desc_hdr = cpu_to_be32(in_be32(priv->chan[ch].reg + TALITOS_DESCBUF)); switch (desc_hdr & DESC_HDR_SEL0_MASK) { case DESC_HDR_SEL0_AFEU: diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c index 27513d311242e8..de7b74505e75e7 100644 --- a/drivers/dax/bus.c +++ b/drivers/dax/bus.c @@ -367,19 +367,28 @@ void kill_dev_dax(struct dev_dax *dev_dax) } EXPORT_SYMBOL_GPL(kill_dev_dax); -static void free_dev_dax_ranges(struct dev_dax *dev_dax) +static void trim_dev_dax_range(struct dev_dax *dev_dax) { + int i = dev_dax->nr_range - 1; + struct range *range = &dev_dax->ranges[i].range; struct dax_region *dax_region = dev_dax->region; - int i; device_lock_assert(dax_region->dev); - for (i = 0; i < dev_dax->nr_range; i++) { - struct range *range = &dev_dax->ranges[i].range; - - __release_region(&dax_region->res, range->start, - range_len(range)); + dev_dbg(&dev_dax->dev, "delete range[%d]: %#llx:%#llx\n", i, + (unsigned long long)range->start, + (unsigned long long)range->end); + + __release_region(&dax_region->res, range->start, range_len(range)); + if (--dev_dax->nr_range == 0) { + kfree(dev_dax->ranges); + dev_dax->ranges = NULL; } - dev_dax->nr_range = 0; +} + +static void free_dev_dax_ranges(struct dev_dax *dev_dax) +{ + while (dev_dax->nr_range) + trim_dev_dax_range(dev_dax); } static void unregister_dev_dax(void *dev) @@ -804,15 +813,10 @@ static int alloc_dev_dax_range(struct dev_dax *dev_dax, u64 start, return 0; rc = devm_register_dax_mapping(dev_dax, dev_dax->nr_range - 1); - if (rc) { - dev_dbg(dev, "delete range[%d]: %pa:%pa\n", dev_dax->nr_range - 1, - &alloc->start, &alloc->end); - dev_dax->nr_range--; - __release_region(res, alloc->start, resource_size(alloc)); - return rc; - } + if (rc) + trim_dev_dax_range(dev_dax); - return 0; + return rc; } static int adjust_dev_dax_range(struct dev_dax *dev_dax, struct resource *res, resource_size_t size) @@ -885,12 +889,7 @@ static int dev_dax_shrink(struct dev_dax *dev_dax, resource_size_t size) if (shrink >= range_len(range)) { devm_release_action(dax_region->dev, unregister_dax_mapping, &mapping->dev); - __release_region(&dax_region->res, range->start, - range_len(range)); - dev_dax->nr_range--; - dev_dbg(dev, "delete range[%d]: %#llx:%#llx\n", i, - (unsigned long long) range->start, - (unsigned long long) range->end); + trim_dev_dax_range(dev_dax); to_shrink -= shrink; if (!to_shrink) break; @@ -1274,7 +1273,6 @@ static void dev_dax_release(struct device *dev) put_dax(dax_dev); free_dev_dax_id(dev_dax); dax_region_put(dax_region); - kfree(dev_dax->ranges); kfree(dev_dax->pgmap); kfree(dev_dax); } diff --git a/drivers/dax/super.c b/drivers/dax/super.c index edc279be3e5960..cadbd0a1a1ef02 100644 --- a/drivers/dax/super.c +++ b/drivers/dax/super.c @@ -752,6 +752,7 @@ static int __init dax_core_init(void) static void __exit dax_core_exit(void) { + dax_bus_exit(); unregister_chrdev_region(dax_devt, MINORMASK+1); ida_destroy(&dax_minor_ida); dax_fs_exit(); diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index 844967f98866a5..922416b3aaceba 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -76,10 +76,6 @@ static void dma_buf_release(struct dentry *dentry) dmabuf->ops->release(dmabuf); - mutex_lock(&db_list.lock); - list_del(&dmabuf->list_node); - mutex_unlock(&db_list.lock); - if (dmabuf->resv == (struct dma_resv *)&dmabuf[1]) dma_resv_fini(dmabuf->resv); @@ -88,6 +84,22 @@ static void dma_buf_release(struct dentry *dentry) kfree(dmabuf); } +static int dma_buf_file_release(struct inode *inode, struct file *file) +{ + struct dma_buf *dmabuf; + + if (!is_dma_buf_file(file)) + return -EINVAL; + + dmabuf = file->private_data; + + mutex_lock(&db_list.lock); + list_del(&dmabuf->list_node); + mutex_unlock(&db_list.lock); + + return 0; +} + static const struct dentry_operations dma_buf_dentry_ops = { .d_dname = dmabuffs_dname, .d_release = dma_buf_release, @@ -413,6 +425,7 @@ static void dma_buf_show_fdinfo(struct seq_file *m, struct file *file) } static const struct file_operations dma_buf_fops = { + .release = dma_buf_file_release, .mmap = dma_buf_mmap_internal, .llseek = dma_buf_llseek, .poll = dma_buf_poll, diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index 1c8f2581cb09a4..1187e5e80eded5 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -200,7 +200,7 @@ int dma_resv_reserve_shared(struct dma_resv *obj, unsigned int num_fences) max = max(old->shared_count + num_fences, old->shared_max * 2); } else { - max = 4; + max = max(4ul, roundup_pow_of_two(num_fences)); } new = dma_resv_list_alloc(max); diff --git a/drivers/dma/dw-edma/dw-edma-core.c b/drivers/dma/dw-edma/dw-edma-core.c index b971505b871523..08d71dafa00157 100644 --- a/drivers/dma/dw-edma/dw-edma-core.c +++ b/drivers/dma/dw-edma/dw-edma-core.c @@ -86,12 +86,12 @@ static struct dw_edma_chunk *dw_edma_alloc_chunk(struct dw_edma_desc *desc) if (desc->chunk) { /* Create and add new element into the linked list */ - desc->chunks_alloc++; - list_add_tail(&chunk->list, &desc->chunk->list); if (!dw_edma_alloc_burst(chunk)) { kfree(chunk); return NULL; } + desc->chunks_alloc++; + list_add_tail(&chunk->list, &desc->chunk->list); } else { /* List head */ chunk->burst = NULL; diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index 07a5db06a29ad3..fb97c9f319a555 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -379,7 +379,7 @@ int idxd_register_driver(void) return 0; drv_fail: - for (; i > 0; i--) + while (--i >= 0) driver_unregister(&idxd_drvs[i]->drv); return rc; } @@ -1639,7 +1639,7 @@ int idxd_register_bus_type(void) return 0; bus_err: - for (; i > 0; i--) + while (--i >= 0) bus_unregister(idxd_bus_types[i]); return rc; } diff --git a/drivers/dma/mediatek/mtk-hsdma.c b/drivers/dma/mediatek/mtk-hsdma.c index f133ae8dece16b..6ad8afbb95f2b3 100644 --- a/drivers/dma/mediatek/mtk-hsdma.c +++ b/drivers/dma/mediatek/mtk-hsdma.c @@ -1007,6 +1007,7 @@ static int mtk_hsdma_probe(struct platform_device *pdev) return 0; err_free: + mtk_hsdma_hw_deinit(hsdma); of_dma_controller_free(pdev->dev.of_node); err_unregister: dma_async_device_unregister(dd); diff --git a/drivers/dma/milbeaut-xdmac.c b/drivers/dma/milbeaut-xdmac.c index 85a597228fb04b..748b260bbc976c 100644 --- a/drivers/dma/milbeaut-xdmac.c +++ b/drivers/dma/milbeaut-xdmac.c @@ -351,7 +351,7 @@ static int milbeaut_xdmac_probe(struct platform_device *pdev) ret = dma_async_device_register(ddev); if (ret) - return ret; + goto disable_xdmac; ret = of_dma_controller_register(dev->of_node, of_dma_simple_xlate, mdev); @@ -364,6 +364,8 @@ static int milbeaut_xdmac_probe(struct platform_device *pdev) unregister_dmac: dma_async_device_unregister(ddev); +disable_xdmac: + disable_xdmac(mdev); return ret; } diff --git a/drivers/dma/mv_xor_v2.c b/drivers/dma/mv_xor_v2.c index 2753a6b916f60e..9b0d463f89bbd4 100644 --- a/drivers/dma/mv_xor_v2.c +++ b/drivers/dma/mv_xor_v2.c @@ -771,8 +771,10 @@ static int mv_xor_v2_probe(struct platform_device *pdev) goto disable_clk; msi_desc = first_msi_entry(&pdev->dev); - if (!msi_desc) + if (!msi_desc) { + ret = -ENODEV; goto free_msi_irqs; + } xor_dev->msi_desc = msi_desc; ret = devm_request_irq(&pdev->dev, msi_desc->irq, diff --git a/drivers/dma/ti/k3-udma.c b/drivers/dma/ti/k3-udma.c index 82cf6c77f5c935..d3902784cae24e 100644 --- a/drivers/dma/ti/k3-udma.c +++ b/drivers/dma/ti/k3-udma.c @@ -3201,8 +3201,7 @@ static int udma_setup_resources(struct udma_dev *ud) } else if (UDMA_CAP3_UCHAN_CNT(cap3)) { ud->tpl_levels = 3; ud->tpl_start_idx[1] = UDMA_CAP3_UCHAN_CNT(cap3); - ud->tpl_start_idx[0] = ud->tpl_start_idx[1] + - UDMA_CAP3_HCHAN_CNT(cap3); + ud->tpl_start_idx[0] = UDMA_CAP3_HCHAN_CNT(cap3); } else if (UDMA_CAP3_HCHAN_CNT(cap3)) { ud->tpl_levels = 2; ud->tpl_start_idx[0] = UDMA_CAP3_HCHAN_CNT(cap3); diff --git a/drivers/dma/xilinx/xilinx_dma.c b/drivers/dma/xilinx/xilinx_dma.c index 22faea653ea820..79777550a6ffc2 100644 --- a/drivers/dma/xilinx/xilinx_dma.c +++ b/drivers/dma/xilinx/xilinx_dma.c @@ -2781,7 +2781,7 @@ static int xilinx_dma_chan_probe(struct xilinx_dma_device *xdev, has_dre = false; if (!has_dre) - xdev->common.copy_align = fls(width - 1); + xdev->common.copy_align = (enum dmaengine_alignment)fls(width - 1); if (of_device_is_compatible(node, "xlnx,axi-vdma-mm2s-channel") || of_device_is_compatible(node, "xlnx,axi-dma-mm2s-channel") || @@ -2900,7 +2900,8 @@ static int xilinx_dma_chan_probe(struct xilinx_dma_device *xdev, static int xilinx_dma_child_probe(struct xilinx_dma_device *xdev, struct device_node *node) { - int ret, i, nr_channels = 1; + int ret, i; + u32 nr_channels = 1; ret = of_property_read_u32(node, "dma-channels", &nr_channels); if (xdev->dma_config->dmatype == XDMA_TYPE_AXIMCDMA && ret < 0) @@ -3112,7 +3113,11 @@ static int xilinx_dma_probe(struct platform_device *pdev) } /* Register the DMA engine with the core */ - dma_async_device_register(&xdev->common); + err = dma_async_device_register(&xdev->common); + if (err) { + dev_err(xdev->dev, "failed to register the dma device\n"); + goto error; + } err = of_dma_controller_register(node, of_dma_xilinx_xlate, xdev); diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 1362274d840b91..620f7041db6b5f 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -18,6 +18,9 @@ static struct amd64_family_type *fam_type; /* Per-node stuff */ static struct ecc_settings **ecc_stngs; +/* Device for the PCI component */ +static struct device *pci_ctl_dev; + /* * Valid scrub rates for the K8 hardware memory scrubber. We map the scrubbing * bandwidth to a valid bit pattern. The 'set' operation finds the 'matching- @@ -2683,6 +2686,9 @@ reserve_mc_sibling_devs(struct amd64_pvt *pvt, u16 pci_id1, u16 pci_id2) return -ENODEV; } + if (!pci_ctl_dev) + pci_ctl_dev = &pvt->F0->dev; + edac_dbg(1, "F0: %s\n", pci_name(pvt->F0)); edac_dbg(1, "F3: %s\n", pci_name(pvt->F3)); edac_dbg(1, "F6: %s\n", pci_name(pvt->F6)); @@ -2707,6 +2713,9 @@ reserve_mc_sibling_devs(struct amd64_pvt *pvt, u16 pci_id1, u16 pci_id2) return -ENODEV; } + if (!pci_ctl_dev) + pci_ctl_dev = &pvt->F2->dev; + edac_dbg(1, "F1: %s\n", pci_name(pvt->F1)); edac_dbg(1, "F2: %s\n", pci_name(pvt->F2)); edac_dbg(1, "F3: %s\n", pci_name(pvt->F3)); @@ -3623,21 +3632,10 @@ static void remove_one_instance(unsigned int nid) static void setup_pci_device(void) { - struct mem_ctl_info *mci; - struct amd64_pvt *pvt; - if (pci_ctl) return; - mci = edac_mc_find(0); - if (!mci) - return; - - pvt = mci->pvt_info; - if (pvt->umc) - pci_ctl = edac_pci_create_generic_ctl(&pvt->F0->dev, EDAC_MOD_STR); - else - pci_ctl = edac_pci_create_generic_ctl(&pvt->F2->dev, EDAC_MOD_STR); + pci_ctl = edac_pci_create_generic_ctl(pci_ctl_dev, EDAC_MOD_STR); if (!pci_ctl) { pr_warn("%s(): Unable to create PCI control\n", __func__); pr_warn("%s(): PCI error report via EDAC not set\n", __func__); @@ -3716,6 +3714,8 @@ static int __init amd64_edac_init(void) return 0; err_pci: + pci_ctl_dev = NULL; + msrs_free(msrs); msrs = NULL; @@ -3745,6 +3745,8 @@ static void __exit amd64_edac_exit(void) kfree(ecc_stngs); ecc_stngs = NULL; + pci_ctl_dev = NULL; + msrs_free(msrs); msrs = NULL; } diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c index c8d11da85becfd..7b52691c45d26c 100644 --- a/drivers/edac/i10nm_base.c +++ b/drivers/edac/i10nm_base.c @@ -6,6 +6,7 @@ */ #include +#include #include #include #include @@ -19,14 +20,16 @@ #define i10nm_printk(level, fmt, arg...) \ edac_printk(level, "i10nm", fmt, ##arg) -#define I10NM_GET_SCK_BAR(d, reg) \ +#define I10NM_GET_SCK_BAR(d, reg) \ pci_read_config_dword((d)->uracu, 0xd0, &(reg)) #define I10NM_GET_IMC_BAR(d, i, reg) \ pci_read_config_dword((d)->uracu, 0xd8 + (i) * 4, &(reg)) #define I10NM_GET_DIMMMTR(m, i, j) \ - (*(u32 *)((m)->mbase + 0x2080c + (i) * 0x4000 + (j) * 4)) + readl((m)->mbase + 0x2080c + (i) * 0x4000 + (j) * 4) #define I10NM_GET_MCDDRTCFG(m, i, j) \ - (*(u32 *)((m)->mbase + 0x20970 + (i) * 0x4000 + (j) * 4)) + readl((m)->mbase + 0x20970 + (i) * 0x4000 + (j) * 4) +#define I10NM_GET_MCMTR(m, i) \ + readl((m)->mbase + 0x20ef8 + (i) * 0x4000) #define I10NM_GET_SCK_MMIO_BASE(reg) (GET_BITFIELD(reg, 0, 28) << 23) #define I10NM_GET_IMC_MMIO_OFFSET(reg) (GET_BITFIELD(reg, 0, 10) << 12) @@ -148,7 +151,7 @@ static bool i10nm_check_ecc(struct skx_imc *imc, int chan) { u32 mcmtr; - mcmtr = *(u32 *)(imc->mbase + 0x20ef8 + chan * 0x4000); + mcmtr = I10NM_GET_MCMTR(imc, chan); edac_dbg(1, "ch%d mcmtr reg %x\n", chan, mcmtr); return !!GET_BITFIELD(mcmtr, 2, 2); diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c index 7f28edb070bd01..6c474fbef32af4 100644 --- a/drivers/edac/mce_amd.c +++ b/drivers/edac/mce_amd.c @@ -1003,7 +1003,7 @@ static void decode_smca_error(struct mce *m) pr_cont(", %s.\n", smca_mce_descs[bank_type].descs[xec]); if (bank_type == SMCA_UMC && xec == 0 && decode_dram_ecc) - decode_dram_ecc(cpu_to_node(m->extcpu), m); + decode_dram_ecc(topology_die_id(m->extcpu), m); } static inline void amd_decode_err_code(u16 ec) diff --git a/drivers/extcon/extcon-max77693.c b/drivers/extcon/extcon-max77693.c index 4a410fd2ea9aec..92af97e00828fa 100644 --- a/drivers/extcon/extcon-max77693.c +++ b/drivers/extcon/extcon-max77693.c @@ -1277,4 +1277,4 @@ module_platform_driver(max77693_muic_driver); MODULE_DESCRIPTION("Maxim MAX77693 Extcon driver"); MODULE_AUTHOR("Chanwoo Choi "); MODULE_LICENSE("GPL"); -MODULE_ALIAS("platform:extcon-max77693"); +MODULE_ALIAS("platform:max77693-muic"); diff --git a/drivers/firmware/arm_scmi/notify.c b/drivers/firmware/arm_scmi/notify.c index ce336899d63661..66196b293b6c2a 100644 --- a/drivers/firmware/arm_scmi/notify.c +++ b/drivers/firmware/arm_scmi/notify.c @@ -1474,17 +1474,17 @@ int scmi_notification_init(struct scmi_handle *handle) ni->gid = gid; ni->handle = handle; + ni->registered_protocols = devm_kcalloc(handle->dev, SCMI_MAX_PROTO, + sizeof(char *), GFP_KERNEL); + if (!ni->registered_protocols) + goto err; + ni->notify_wq = alloc_workqueue(dev_name(handle->dev), WQ_UNBOUND | WQ_FREEZABLE | WQ_SYSFS, 0); if (!ni->notify_wq) goto err; - ni->registered_protocols = devm_kcalloc(handle->dev, SCMI_MAX_PROTO, - sizeof(char *), GFP_KERNEL); - if (!ni->registered_protocols) - goto err; - mutex_init(&ni->pending_mtx); hash_init(ni->pending_events_handlers); diff --git a/drivers/firmware/efi/apple-properties.c b/drivers/firmware/efi/apple-properties.c index 34f53d898acb0f..e1926483ae2fdc 100644 --- a/drivers/firmware/efi/apple-properties.c +++ b/drivers/firmware/efi/apple-properties.c @@ -3,8 +3,9 @@ * apple-properties.c - EFI device properties on Macs * Copyright (C) 2016 Lukas Wunner * - * Note, all properties are considered as u8 arrays. - * To get a value of any of them the caller must use device_property_read_u8_array(). + * Properties are stored either as: + * u8 arrays which can be retrieved with device_property_read_u8_array() or + * booleans which can be queried with device_property_present(). */ #define pr_fmt(fmt) "apple-properties: " fmt @@ -88,8 +89,12 @@ static void __init unmarshal_key_value_pairs(struct dev_header *dev_header, entry_data = ptr + key_len + sizeof(val_len); entry_len = val_len - sizeof(val_len); - entry[i] = PROPERTY_ENTRY_U8_ARRAY_LEN(key, entry_data, - entry_len); + if (entry_len) + entry[i] = PROPERTY_ENTRY_U8_ARRAY_LEN(key, entry_data, + entry_len); + else + entry[i] = PROPERTY_ENTRY_BOOL(key); + if (dump_properties) { dev_info(dev, "property: %s\n", key); print_hex_dump(KERN_INFO, pr_fmt(), DUMP_PREFIX_OFFSET, diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 6c6eec044a978a..df3f9bcab581c4 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -57,6 +57,7 @@ struct mm_struct efi_mm = { .mm_rb = RB_ROOT, .mm_users = ATOMIC_INIT(2), .mm_count = ATOMIC_INIT(1), + .write_protect_seq = SEQCNT_ZERO(efi_mm.write_protect_seq), MMAP_LOCK_INITIALIZER(efi_mm) .page_table_lock = __SPIN_LOCK_UNLOCKED(efi_mm.page_table_lock), .mmlist = LIST_HEAD_INIT(efi_mm.mmlist), diff --git a/drivers/firmware/imx/Kconfig b/drivers/firmware/imx/Kconfig index 1d2e5b85d7ca84..c027d99f2a599e 100644 --- a/drivers/firmware/imx/Kconfig +++ b/drivers/firmware/imx/Kconfig @@ -13,6 +13,7 @@ config IMX_DSP config IMX_SCU bool "IMX SCU Protocol driver" depends on IMX_MBOX + select SOC_BUS help The System Controller Firmware (SCFW) is a low-level system function which runs on a dedicated Cortex-M core to provide power, clock, and diff --git a/drivers/firmware/tegra/bpmp-debugfs.c b/drivers/firmware/tegra/bpmp-debugfs.c index c1bbba9ee93a33..440d99c63638bf 100644 --- a/drivers/firmware/tegra/bpmp-debugfs.c +++ b/drivers/firmware/tegra/bpmp-debugfs.c @@ -412,16 +412,12 @@ static int bpmp_populate_debugfs_inband(struct tegra_bpmp *bpmp, goto out; } - len = strlen(ppath) + strlen(name) + 1; + len = snprintf(pathbuf, pathlen, "%s%s/", ppath, name); if (len >= pathlen) { err = -EINVAL; goto out; } - strncpy(pathbuf, ppath, pathlen); - strncat(pathbuf, name, strlen(name)); - strcat(pathbuf, "/"); - err = bpmp_populate_debugfs_inband(bpmp, dentry, pathbuf); if (err < 0) diff --git a/drivers/fsi/fsi-master-aspeed.c b/drivers/fsi/fsi-master-aspeed.c index c006ec008a1aae..90dbe58ca1edca 100644 --- a/drivers/fsi/fsi-master-aspeed.c +++ b/drivers/fsi/fsi-master-aspeed.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -19,6 +20,7 @@ struct fsi_master_aspeed { struct fsi_master master; + struct mutex lock; /* protect HW access */ struct device *dev; void __iomem *base; struct clk *clk; @@ -254,6 +256,8 @@ static int aspeed_master_read(struct fsi_master *master, int link, addr |= id << 21; addr += link * FSI_HUB_LINK_SIZE; + mutex_lock(&aspeed->lock); + switch (size) { case 1: ret = opb_readb(aspeed, fsi_base + addr, val); @@ -265,14 +269,14 @@ static int aspeed_master_read(struct fsi_master *master, int link, ret = opb_readl(aspeed, fsi_base + addr, val); break; default: - return -EINVAL; + ret = -EINVAL; + goto done; } ret = check_errors(aspeed, ret); - if (ret) - return ret; - - return 0; +done: + mutex_unlock(&aspeed->lock); + return ret; } static int aspeed_master_write(struct fsi_master *master, int link, @@ -287,6 +291,8 @@ static int aspeed_master_write(struct fsi_master *master, int link, addr |= id << 21; addr += link * FSI_HUB_LINK_SIZE; + mutex_lock(&aspeed->lock); + switch (size) { case 1: ret = opb_writeb(aspeed, fsi_base + addr, *(u8 *)val); @@ -298,14 +304,14 @@ static int aspeed_master_write(struct fsi_master *master, int link, ret = opb_writel(aspeed, fsi_base + addr, *(__be32 *)val); break; default: - return -EINVAL; + ret = -EINVAL; + goto done; } ret = check_errors(aspeed, ret); - if (ret) - return ret; - - return 0; +done: + mutex_unlock(&aspeed->lock); + return ret; } static int aspeed_master_link_enable(struct fsi_master *master, int link, @@ -320,17 +326,21 @@ static int aspeed_master_link_enable(struct fsi_master *master, int link, reg = cpu_to_be32(0x80000000 >> bit); - if (!enable) - return opb_writel(aspeed, ctrl_base + FSI_MCENP0 + (4 * idx), - reg); + mutex_lock(&aspeed->lock); + + if (!enable) { + ret = opb_writel(aspeed, ctrl_base + FSI_MCENP0 + (4 * idx), reg); + goto done; + } ret = opb_writel(aspeed, ctrl_base + FSI_MSENP0 + (4 * idx), reg); if (ret) - return ret; + goto done; mdelay(FSI_LINK_ENABLE_SETUP_TIME); - - return 0; +done: + mutex_unlock(&aspeed->lock); + return ret; } static int aspeed_master_term(struct fsi_master *master, int link, uint8_t id) @@ -431,9 +441,11 @@ static ssize_t cfam_reset_store(struct device *dev, struct device_attribute *att { struct fsi_master_aspeed *aspeed = dev_get_drvdata(dev); + mutex_lock(&aspeed->lock); gpiod_set_value(aspeed->cfam_reset_gpio, 1); usleep_range(900, 1000); gpiod_set_value(aspeed->cfam_reset_gpio, 0); + mutex_unlock(&aspeed->lock); return count; } @@ -597,6 +609,7 @@ static int fsi_master_aspeed_probe(struct platform_device *pdev) dev_set_drvdata(&pdev->dev, aspeed); + mutex_init(&aspeed->lock); aspeed_master_init(aspeed); rc = fsi_master_register(&aspeed->master); diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig index 5d4de5cd675953..f20ac3d6942468 100644 --- a/drivers/gpio/Kconfig +++ b/drivers/gpio/Kconfig @@ -508,7 +508,8 @@ config GPIO_SAMA5D2_PIOBU config GPIO_SIFIVE bool "SiFive GPIO support" - depends on OF_GPIO && IRQ_DOMAIN_HIERARCHY + depends on OF_GPIO + select IRQ_DOMAIN_HIERARCHY select GPIO_GENERIC select GPIOLIB_IRQCHIP select REGMAP_MMIO diff --git a/drivers/gpio/gpio-mvebu.c b/drivers/gpio/gpio-mvebu.c index 2f245594a90a60..ed7c5fc47f5244 100644 --- a/drivers/gpio/gpio-mvebu.c +++ b/drivers/gpio/gpio-mvebu.c @@ -660,9 +660,8 @@ static void mvebu_pwm_get_state(struct pwm_chip *chip, spin_lock_irqsave(&mvpwm->lock, flags); - val = (unsigned long long) - readl_relaxed(mvebu_pwmreg_blink_on_duration(mvpwm)); - val *= NSEC_PER_SEC; + u = readl_relaxed(mvebu_pwmreg_blink_on_duration(mvpwm)); + val = (unsigned long long) u * NSEC_PER_SEC; do_div(val, mvpwm->clk_rate); if (val > UINT_MAX) state->duty_cycle = UINT_MAX; @@ -671,21 +670,17 @@ static void mvebu_pwm_get_state(struct pwm_chip *chip, else state->duty_cycle = 1; - val = (unsigned long long) - readl_relaxed(mvebu_pwmreg_blink_off_duration(mvpwm)); + val = (unsigned long long) u; /* on duration */ + /* period = on + off duration */ + val += readl_relaxed(mvebu_pwmreg_blink_off_duration(mvpwm)); val *= NSEC_PER_SEC; do_div(val, mvpwm->clk_rate); - if (val < state->duty_cycle) { + if (val > UINT_MAX) + state->period = UINT_MAX; + else if (val) + state->period = val; + else state->period = 1; - } else { - val -= state->duty_cycle; - if (val > UINT_MAX) - state->period = UINT_MAX; - else if (val) - state->period = val; - else - state->period = 1; - } regmap_read(mvchip->regs, GPIO_BLINK_EN_OFF + mvchip->offset, &u); if (u) diff --git a/drivers/gpio/gpiolib-cdev.c b/drivers/gpio/gpiolib-cdev.c index e9faeaf65d14f5..689c06cbbb457a 100644 --- a/drivers/gpio/gpiolib-cdev.c +++ b/drivers/gpio/gpiolib-cdev.c @@ -1960,6 +1960,21 @@ struct gpio_chardev_data { #endif }; +static int chipinfo_get(struct gpio_chardev_data *cdev, void __user *ip) +{ + struct gpio_device *gdev = cdev->gdev; + struct gpiochip_info chipinfo; + + memset(&chipinfo, 0, sizeof(chipinfo)); + + strscpy(chipinfo.name, dev_name(&gdev->dev), sizeof(chipinfo.name)); + strscpy(chipinfo.label, gdev->label, sizeof(chipinfo.label)); + chipinfo.lines = gdev->ngpio; + if (copy_to_user(ip, &chipinfo, sizeof(chipinfo))) + return -EFAULT; + return 0; +} + #ifdef CONFIG_GPIO_CDEV_V1 /* * returns 0 if the versions match, else the previously selected ABI version @@ -1974,6 +1989,41 @@ static int lineinfo_ensure_abi_version(struct gpio_chardev_data *cdata, return abiv; } + +static int lineinfo_get_v1(struct gpio_chardev_data *cdev, void __user *ip, + bool watch) +{ + struct gpio_desc *desc; + struct gpioline_info lineinfo; + struct gpio_v2_line_info lineinfo_v2; + + if (copy_from_user(&lineinfo, ip, sizeof(lineinfo))) + return -EFAULT; + + /* this doubles as a range check on line_offset */ + desc = gpiochip_get_desc(cdev->gdev->chip, lineinfo.line_offset); + if (IS_ERR(desc)) + return PTR_ERR(desc); + + if (watch) { + if (lineinfo_ensure_abi_version(cdev, 1)) + return -EPERM; + + if (test_and_set_bit(lineinfo.line_offset, cdev->watched_lines)) + return -EBUSY; + } + + gpio_desc_to_lineinfo(desc, &lineinfo_v2); + gpio_v2_line_info_to_v1(&lineinfo_v2, &lineinfo); + + if (copy_to_user(ip, &lineinfo, sizeof(lineinfo))) { + if (watch) + clear_bit(lineinfo.line_offset, cdev->watched_lines); + return -EFAULT; + } + + return 0; +} #endif static int lineinfo_get(struct gpio_chardev_data *cdev, void __user *ip, @@ -2011,6 +2061,22 @@ static int lineinfo_get(struct gpio_chardev_data *cdev, void __user *ip, return 0; } +static int lineinfo_unwatch(struct gpio_chardev_data *cdev, void __user *ip) +{ + __u32 offset; + + if (copy_from_user(&offset, ip, sizeof(offset))) + return -EFAULT; + + if (offset >= cdev->gdev->ngpio) + return -EINVAL; + + if (!test_and_clear_bit(offset, cdev->watched_lines)) + return -EBUSY; + + return 0; +} + /* * gpio_ioctl() - ioctl handler for the GPIO chardev */ @@ -2018,80 +2084,24 @@ static long gpio_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct gpio_chardev_data *cdev = file->private_data; struct gpio_device *gdev = cdev->gdev; - struct gpio_chip *gc = gdev->chip; void __user *ip = (void __user *)arg; - __u32 offset; /* We fail any subsequent ioctl():s when the chip is gone */ - if (!gc) + if (!gdev->chip) return -ENODEV; /* Fill in the struct and pass to userspace */ if (cmd == GPIO_GET_CHIPINFO_IOCTL) { - struct gpiochip_info chipinfo; - - memset(&chipinfo, 0, sizeof(chipinfo)); - - strscpy(chipinfo.name, dev_name(&gdev->dev), - sizeof(chipinfo.name)); - strscpy(chipinfo.label, gdev->label, - sizeof(chipinfo.label)); - chipinfo.lines = gdev->ngpio; - if (copy_to_user(ip, &chipinfo, sizeof(chipinfo))) - return -EFAULT; - return 0; + return chipinfo_get(cdev, ip); #ifdef CONFIG_GPIO_CDEV_V1 - } else if (cmd == GPIO_GET_LINEINFO_IOCTL) { - struct gpio_desc *desc; - struct gpioline_info lineinfo; - struct gpio_v2_line_info lineinfo_v2; - - if (copy_from_user(&lineinfo, ip, sizeof(lineinfo))) - return -EFAULT; - - /* this doubles as a range check on line_offset */ - desc = gpiochip_get_desc(gc, lineinfo.line_offset); - if (IS_ERR(desc)) - return PTR_ERR(desc); - - gpio_desc_to_lineinfo(desc, &lineinfo_v2); - gpio_v2_line_info_to_v1(&lineinfo_v2, &lineinfo); - - if (copy_to_user(ip, &lineinfo, sizeof(lineinfo))) - return -EFAULT; - return 0; } else if (cmd == GPIO_GET_LINEHANDLE_IOCTL) { return linehandle_create(gdev, ip); } else if (cmd == GPIO_GET_LINEEVENT_IOCTL) { return lineevent_create(gdev, ip); - } else if (cmd == GPIO_GET_LINEINFO_WATCH_IOCTL) { - struct gpio_desc *desc; - struct gpioline_info lineinfo; - struct gpio_v2_line_info lineinfo_v2; - - if (copy_from_user(&lineinfo, ip, sizeof(lineinfo))) - return -EFAULT; - - /* this doubles as a range check on line_offset */ - desc = gpiochip_get_desc(gc, lineinfo.line_offset); - if (IS_ERR(desc)) - return PTR_ERR(desc); - - if (lineinfo_ensure_abi_version(cdev, 1)) - return -EPERM; - - if (test_and_set_bit(lineinfo.line_offset, cdev->watched_lines)) - return -EBUSY; - - gpio_desc_to_lineinfo(desc, &lineinfo_v2); - gpio_v2_line_info_to_v1(&lineinfo_v2, &lineinfo); - - if (copy_to_user(ip, &lineinfo, sizeof(lineinfo))) { - clear_bit(lineinfo.line_offset, cdev->watched_lines); - return -EFAULT; - } - - return 0; + } else if (cmd == GPIO_GET_LINEINFO_IOCTL || + cmd == GPIO_GET_LINEINFO_WATCH_IOCTL) { + return lineinfo_get_v1(cdev, ip, + cmd == GPIO_GET_LINEINFO_WATCH_IOCTL); #endif /* CONFIG_GPIO_CDEV_V1 */ } else if (cmd == GPIO_V2_GET_LINEINFO_IOCTL || cmd == GPIO_V2_GET_LINEINFO_WATCH_IOCTL) { @@ -2100,16 +2110,7 @@ static long gpio_ioctl(struct file *file, unsigned int cmd, unsigned long arg) } else if (cmd == GPIO_V2_GET_LINE_IOCTL) { return linereq_create(gdev, ip); } else if (cmd == GPIO_GET_LINEINFO_UNWATCH_IOCTL) { - if (copy_from_user(&offset, ip, sizeof(offset))) - return -EFAULT; - - if (offset >= cdev->gdev->ngpio) - return -EINVAL; - - if (!test_and_clear_bit(offset, cdev->watched_lines)) - return -EBUSY; - - return 0; + return lineinfo_unwatch(cdev, ip); } return -EINVAL; } diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 6e3c4d7a7d1467..4ad3c4b276dcf7 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -1477,7 +1477,8 @@ static void gpiochip_set_irq_hooks(struct gpio_chip *gc) if (WARN_ON(gc->irq.irq_enable)) return; /* Check if the irqchip already has this hook... */ - if (irqchip->irq_enable == gpiochip_irq_enable) { + if (irqchip->irq_enable == gpiochip_irq_enable || + irqchip->irq_mask == gpiochip_irq_mask) { /* * ...and if so, give a gentle warning that this is bad * practice. diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c index 65d1b23d7e7467..b9c11c2b2885a3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c @@ -1414,10 +1414,12 @@ amdgpu_connector_dp_detect(struct drm_connector *connector, bool force) pm_runtime_put_autosuspend(connector->dev->dev); } - drm_dp_set_subconnector_property(&amdgpu_connector->base, - ret, - amdgpu_dig_connector->dpcd, - amdgpu_dig_connector->downstream_ports); + if (connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort || + connector->connector_type == DRM_MODE_CONNECTOR_eDP) + drm_dp_set_subconnector_property(&amdgpu_connector->base, + ret, + amdgpu_dig_connector->dpcd, + amdgpu_dig_connector->downstream_ports); return ret; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 026789b466db9a..76d10f1c579bae 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -80,7 +80,6 @@ MODULE_FIRMWARE("amdgpu/renoir_gpu_info.bin"); MODULE_FIRMWARE("amdgpu/navi10_gpu_info.bin"); MODULE_FIRMWARE("amdgpu/navi14_gpu_info.bin"); MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin"); -MODULE_FIRMWARE("amdgpu/green_sardine_gpu_info.bin"); #define AMDGPU_RESUME_MS 2000 @@ -2524,11 +2523,11 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev) if (adev->gmc.xgmi.num_physical_nodes > 1) amdgpu_xgmi_remove_device(adev); - amdgpu_amdkfd_device_fini(adev); - amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE); amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE); + amdgpu_amdkfd_device_fini(adev); + /* need to disable SMC first */ for (i = 0; i < adev->num_ip_blocks; i++) { if (!adev->ip_blocks[i].status.hw) @@ -3008,7 +3007,7 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type) #endif default: if (amdgpu_dc > 0) - DRM_INFO("Display Core has been requested via kernel parameter " + DRM_INFO_ONCE("Display Core has been requested via kernel parameter " "but isn't supported by ASIC, ignoring\n"); return false; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 8e988f07f08569..0b786d8dd8bc75 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -1076,6 +1076,8 @@ static const struct pci_device_id pciidlist[] = { /* Renoir */ {0x1002, 0x1636, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RENOIR|AMD_IS_APU}, + {0x1002, 0x1638, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RENOIR|AMD_IS_APU}, + {0x1002, 0x164C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RENOIR|AMD_IS_APU}, /* Navi12 */ {0x1002, 0x7360, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI12}, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 8c9bacfdbc300c..c485ec86804e51 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -193,10 +193,14 @@ static bool amdgpu_gfx_is_multipipe_capable(struct amdgpu_device *adev) } bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev, - int queue) + int pipe, int queue) { - /* Policy: make queue 0 of each pipe as high priority compute queue */ - return (queue == 0); + bool multipipe_policy = amdgpu_gfx_is_multipipe_capable(adev); + int cond; + /* Policy: alternate between normal and high priority */ + cond = multipipe_policy ? pipe : queue; + + return ((cond % 2) != 0); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 258498cbf1ebaa..f353a5b71804e1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -373,7 +373,7 @@ void amdgpu_queue_mask_bit_to_mec_queue(struct amdgpu_device *adev, int bit, bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev, int mec, int pipe, int queue); bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev, - int queue); + int pipe, int queue); int amdgpu_gfx_me_queue_to_bit(struct amdgpu_device *adev, int me, int pipe, int queue); void amdgpu_gfx_bit_to_me_queue(struct amdgpu_device *adev, int bit, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index 3e4892b7b7d3ca..ff4e226739308c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -494,13 +494,14 @@ void amdgpu_gmc_get_vbios_allocations(struct amdgpu_device *adev) break; } - if (!amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_DCE)) + if (!amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_DCE)) { size = 0; - else + } else { size = amdgpu_gmc_get_vbios_fb_size(adev); - if (adev->mman.keep_stolen_vga_memory) - size = max(size, (unsigned)AMDGPU_VBIOS_VGA_ALLOCATION); + if (adev->mman.keep_stolen_vga_memory) + size = max(size, (unsigned)AMDGPU_VBIOS_VGA_ALLOCATION); + } /* set to 0 if the pre-OS buffer uses up most of vram */ if ((adev->gmc.real_vram_size - size) < (8 * 1024 * 1024)) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index a6dbe4b83533f7..2f47f81a74a570 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -1283,8 +1283,12 @@ static int psp_hdcp_terminate(struct psp_context *psp) if (amdgpu_sriov_vf(psp->adev)) return 0; - if (!psp->hdcp_context.hdcp_initialized) - return 0; + if (!psp->hdcp_context.hdcp_initialized) { + if (psp->hdcp_context.hdcp_shared_buf) + goto out; + else + return 0; + } ret = psp_hdcp_unload(psp); if (ret) @@ -1292,6 +1296,7 @@ static int psp_hdcp_terminate(struct psp_context *psp) psp->hdcp_context.hdcp_initialized = false; +out: /* free hdcp shared memory */ amdgpu_bo_free_kernel(&psp->hdcp_context.hdcp_shared_bo, &psp->hdcp_context.hdcp_shared_mc_addr, @@ -1430,8 +1435,12 @@ static int psp_dtm_terminate(struct psp_context *psp) if (amdgpu_sriov_vf(psp->adev)) return 0; - if (!psp->dtm_context.dtm_initialized) - return 0; + if (!psp->dtm_context.dtm_initialized) { + if (psp->dtm_context.dtm_shared_buf) + goto out; + else + return 0; + } ret = psp_dtm_unload(psp); if (ret) @@ -1439,6 +1448,7 @@ static int psp_dtm_terminate(struct psp_context *psp) psp->dtm_context.dtm_initialized = false; +out: /* free hdcp shared memory */ amdgpu_bo_free_kernel(&psp->dtm_context.dtm_shared_bo, &psp->dtm_context.dtm_shared_mc_addr, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 55f4b8c3b93383..4ebb43e090999f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -4334,7 +4334,8 @@ static int gfx_v10_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) + ring->pipe; - hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring->queue) ? + hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring->pipe, + ring->queue) ? AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL; /* type-2 packets are deprecated on MEC, use type-3 instead */ r = amdgpu_ring_init(adev, ring, 1024, @@ -6360,7 +6361,8 @@ static void gfx_v10_0_compute_mqd_set_priority(struct amdgpu_ring *ring, struct struct amdgpu_device *adev = ring->adev; if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { - if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring->queue)) { + if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring->pipe, + ring->queue)) { mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH; mqd->cp_hqd_queue_priority = AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 94b7e0531d0928..c36258d56b4455 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -1915,7 +1915,8 @@ static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) + ring->pipe; - hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring->queue) ? + hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring->pipe, + ring->queue) ? AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_RING_PRIO_DEFAULT; /* type-2 packets are deprecated on MEC, use type-3 instead */ r = amdgpu_ring_init(adev, ring, 1024, @@ -4433,7 +4434,8 @@ static void gfx_v8_0_mqd_set_priority(struct amdgpu_ring *ring, struct vi_mqd *m struct amdgpu_device *adev = ring->adev; if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { - if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring->queue)) { + if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring->pipe, + ring->queue)) { mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH; mqd->cp_hqd_queue_priority = AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 0d8e203b10efb2..957c12b727676a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -2228,7 +2228,8 @@ static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) + ring->pipe; - hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring->queue) ? + hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring->pipe, + ring->queue) ? AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL; /* type-2 packets are deprecated on MEC, use type-3 instead */ return amdgpu_ring_init(adev, ring, 1024, @@ -3383,7 +3384,9 @@ static void gfx_v9_0_mqd_set_priority(struct amdgpu_ring *ring, struct v9_mqd *m struct amdgpu_device *adev = ring->adev; if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { - if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring->queue)) { + if (amdgpu_gfx_is_high_priority_compute_queue(adev, + ring->pipe, + ring->queue)) { mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH; mqd->cp_hqd_queue_priority = AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM; diff --git a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h index 4137dc710aafd9..7ad0434be293ba 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h +++ b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h @@ -47,7 +47,7 @@ enum psp_gfx_crtl_cmd_id GFX_CTRL_CMD_ID_DISABLE_INT = 0x00060000, /* disable PSP-to-Gfx interrupt */ GFX_CTRL_CMD_ID_MODE1_RST = 0x00070000, /* trigger the Mode 1 reset */ GFX_CTRL_CMD_ID_GBR_IH_SET = 0x00080000, /* set Gbr IH_RB_CNTL registers */ - GFX_CTRL_CMD_ID_CONSUME_CMD = 0x000A0000, /* send interrupt to psp for updating write pointer of vf */ + GFX_CTRL_CMD_ID_CONSUME_CMD = 0x00090000, /* send interrupt to psp for updating write pointer of vf */ GFX_CTRL_CMD_ID_DESTROY_GPCOM_RING = 0x000C0000, /* destroy GPCOM ring */ GFX_CTRL_CMD_ID_MAX = 0x000F0000, /* max command ID */ diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index f57c5f57efa8a9..41cd108214d6d9 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -1242,7 +1242,8 @@ static int soc15_common_early_init(void *handle) break; case CHIP_RENOIR: adev->asic_funcs = &soc15_asic_funcs; - if (adev->pdev->device == 0x1636) + if ((adev->pdev->device == 0x1636) || + (adev->pdev->device == 0x164c)) adev->apu_flags |= AMD_APU_IS_RENOIR; else adev->apu_flags |= AMD_APU_IS_GREEN_SARDINE; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index 3de5e14c5ae316..31d793ee0836e1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -774,6 +774,7 @@ int kfd_create_crat_image_acpi(void **crat_image, size_t *size) struct acpi_table_header *crat_table; acpi_status status; void *pcrat_image; + int rc = 0; if (!crat_image) return -EINVAL; @@ -798,14 +799,17 @@ int kfd_create_crat_image_acpi(void **crat_image, size_t *size) } pcrat_image = kvmalloc(crat_table->length, GFP_KERNEL); - if (!pcrat_image) - return -ENOMEM; + if (!pcrat_image) { + rc = -ENOMEM; + goto out; + } memcpy(pcrat_image, crat_table, crat_table->length); *crat_image = pcrat_image; *size = crat_table->length; - - return 0; +out: + acpi_put_table(crat_table); + return rc; } /* Memory required to create Virtual CRAT. @@ -988,6 +992,7 @@ static int kfd_create_vcrat_image_cpu(void *pcrat_image, size_t *size) CRAT_OEMID_LENGTH); memcpy(crat_table->oem_table_id, acpi_table->oem_table_id, CRAT_OEMTABLEID_LENGTH); + acpi_put_table(acpi_table); } crat_table->total_entries = 0; crat_table->num_domains = 0; @@ -1029,11 +1034,14 @@ static int kfd_create_vcrat_image_cpu(void *pcrat_image, size_t *size) (struct crat_subtype_iolink *)sub_type_hdr); if (ret < 0) return ret; - crat_table->length += (sub_type_hdr->length * entries); - crat_table->total_entries += entries; - sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + - sub_type_hdr->length * entries); + if (entries) { + crat_table->length += (sub_type_hdr->length * entries); + crat_table->total_entries += entries; + + sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + + sub_type_hdr->length * entries); + } #else pr_info("IO link not available for non x86 platforms\n"); #endif diff --git a/drivers/gpu/drm/amd/display/Kconfig b/drivers/gpu/drm/amd/display/Kconfig index 60dfdd432aba0e..3c410d236c4919 100644 --- a/drivers/gpu/drm/amd/display/Kconfig +++ b/drivers/gpu/drm/amd/display/Kconfig @@ -6,7 +6,7 @@ config DRM_AMD_DC bool "AMD DC - Enable new display engine" default y select SND_HDA_COMPONENT if SND_HDA_CORE - select DRM_AMD_DC_DCN if (X86 || PPC64 || (ARM64 && KERNEL_MODE_NEON)) && !(KCOV_INSTRUMENT_ALL && KCOV_ENABLE_COMPARISONS) + select DRM_AMD_DC_DCN if (X86 || PPC64) && !(KCOV_INSTRUMENT_ALL && KCOV_ENABLE_COMPARISONS) help Choose this option if you want to use the new display engine support for AMDGPU. This adds required support for Vega and diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c index d0699e98db9292..e00a30e7d25294 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c @@ -113,7 +113,7 @@ int amdgpu_dm_crtc_configure_crc_source(struct drm_crtc *crtc, mutex_lock(&adev->dm.dc_lock); /* Enable CRTC CRC generation if necessary. */ - if (dm_is_crc_source_crtc(source)) { + if (dm_is_crc_source_crtc(source) || source == AMDGPU_DM_PIPE_CRC_SOURCE_NONE) { if (!dc_stream_configure_crc(stream_state->ctx->dc, stream_state, enable, enable)) { ret = -EINVAL; diff --git a/drivers/gpu/drm/amd/display/dc/calcs/Makefile b/drivers/gpu/drm/amd/display/dc/calcs/Makefile index 64f515d7441031..4674aca8f20697 100644 --- a/drivers/gpu/drm/amd/display/dc/calcs/Makefile +++ b/drivers/gpu/drm/amd/display/dc/calcs/Makefile @@ -33,10 +33,6 @@ ifdef CONFIG_PPC64 calcs_ccflags := -mhard-float -maltivec endif -ifdef CONFIG_ARM64 -calcs_rcflags := -mgeneral-regs-only -endif - ifdef CONFIG_CC_IS_GCC ifeq ($(call cc-ifversion, -lt, 0701, y), y) IS_OLD_GCC = 1 @@ -57,9 +53,6 @@ endif CFLAGS_$(AMDDALPATH)/dc/calcs/dcn_calcs.o := $(calcs_ccflags) CFLAGS_$(AMDDALPATH)/dc/calcs/dcn_calc_auto.o := $(calcs_ccflags) CFLAGS_$(AMDDALPATH)/dc/calcs/dcn_calc_math.o := $(calcs_ccflags) -Wno-tautological-compare -CFLAGS_REMOVE_$(AMDDALPATH)/dc/calcs/dcn_calcs.o := $(calcs_rcflags) -CFLAGS_REMOVE_$(AMDDALPATH)/dc/calcs/dcn_calc_auto.o := $(calcs_rcflags) -CFLAGS_REMOVE_$(AMDDALPATH)/dc/calcs/dcn_calc_math.o := $(calcs_rcflags) BW_CALCS = dce_calcs.o bw_fixed.o custom_float.o diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile b/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile index 1a495759a03435..52b1ce775a1e8f 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile @@ -104,13 +104,6 @@ ifdef CONFIG_PPC64 CFLAGS_$(AMDDALPATH)/dc/clk_mgr/dcn21/rn_clk_mgr.o := $(call cc-option,-mno-gnu-attribute) endif -# prevent build errors: -# ...: '-mgeneral-regs-only' is incompatible with the use of floating-point types -# this file is unused on arm64, just like on ppc64 -ifdef CONFIG_ARM64 -CFLAGS_REMOVE_$(AMDDALPATH)/dc/clk_mgr/dcn21/rn_clk_mgr.o := -mgeneral-regs-only -endif - AMD_DAL_CLK_MGR_DCN21 = $(addprefix $(AMDDALPATH)/dc/clk_mgr/dcn21/,$(CLK_MGR_DCN21)) AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN21) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c index 6b431db146cd97..1c6e401dd4ccee 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c @@ -704,24 +704,24 @@ static struct wm_table ddr4_wm_table_rn = { .wm_inst = WM_B, .wm_type = WM_TYPE_PSTATE_CHG, .pstate_latency_us = 11.72, - .sr_exit_time_us = 10.12, - .sr_enter_plus_exit_time_us = 11.48, + .sr_exit_time_us = 11.12, + .sr_enter_plus_exit_time_us = 12.48, .valid = true, }, { .wm_inst = WM_C, .wm_type = WM_TYPE_PSTATE_CHG, .pstate_latency_us = 11.72, - .sr_exit_time_us = 10.12, - .sr_enter_plus_exit_time_us = 11.48, + .sr_exit_time_us = 11.12, + .sr_enter_plus_exit_time_us = 12.48, .valid = true, }, { .wm_inst = WM_D, .wm_type = WM_TYPE_PSTATE_CHG, .pstate_latency_us = 11.72, - .sr_exit_time_us = 10.12, - .sr_enter_plus_exit_time_us = 11.48, + .sr_exit_time_us = 11.12, + .sr_enter_plus_exit_time_us = 12.48, .valid = true, }, } diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c index b0e9b0509568c5..95d883482227e5 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c @@ -239,6 +239,7 @@ static void dcn3_update_clocks(struct clk_mgr *clk_mgr_base, struct dmcu *dmcu = clk_mgr_base->ctx->dc->res_pool->dmcu; bool force_reset = false; bool update_uclk = false; + bool p_state_change_support; if (dc->work_arounds.skip_clock_update || !clk_mgr->smu_present) return; @@ -279,8 +280,9 @@ static void dcn3_update_clocks(struct clk_mgr *clk_mgr_base, clk_mgr_base->clks.socclk_khz = new_clocks->socclk_khz; clk_mgr_base->clks.prev_p_state_change_support = clk_mgr_base->clks.p_state_change_support; - if (should_update_pstate_support(safe_to_lower, new_clocks->p_state_change_support, clk_mgr_base->clks.p_state_change_support)) { - clk_mgr_base->clks.p_state_change_support = new_clocks->p_state_change_support; + p_state_change_support = new_clocks->p_state_change_support || (display_count == 0); + if (should_update_pstate_support(safe_to_lower, p_state_change_support, clk_mgr_base->clks.p_state_change_support)) { + clk_mgr_base->clks.p_state_change_support = p_state_change_support; /* to disable P-State switching, set UCLK min = max */ if (!clk_mgr_base->clks.p_state_change_support) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index 5b0cedfa824a9d..e1e5d81a5e4386 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -2471,9 +2471,14 @@ enum dc_status dc_link_validate_mode_timing( static struct abm *get_abm_from_stream_res(const struct dc_link *link) { int i; - struct dc *dc = link->ctx->dc; + struct dc *dc = NULL; struct abm *abm = NULL; + if (!link || !link->ctx) + return NULL; + + dc = link->ctx->dc; + for (i = 0; i < MAX_PIPES; i++) { struct pipe_ctx pipe_ctx = dc->current_state->res_ctx.pipe_ctx[i]; struct dc_stream_state *stream = pipe_ctx.stream; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index ff1e9963ec7a2b..17e6fd82013956 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -2375,6 +2375,9 @@ static bool decide_dp_link_settings(struct dc_link *link, struct dc_link_setting initial_link_setting; uint32_t link_bw; + if (req_bw > dc_link_bandwidth_kbps(link, &link->verified_link_cap)) + return false; + /* search for the minimum link setting that: * 1. is supported according to the link training result * 2. could support the b/w requested by the timing @@ -3020,14 +3023,14 @@ bool dc_link_handle_hpd_rx_irq(struct dc_link *link, union hpd_irq_data *out_hpd for (i = 0; i < MAX_PIPES; i++) { pipe_ctx = &link->dc->current_state->res_ctx.pipe_ctx[i]; if (pipe_ctx && pipe_ctx->stream && !pipe_ctx->stream->dpms_off && - pipe_ctx->stream->link == link) + pipe_ctx->stream->link == link && !pipe_ctx->prev_odm_pipe) core_link_disable_stream(pipe_ctx); } for (i = 0; i < MAX_PIPES; i++) { pipe_ctx = &link->dc->current_state->res_ctx.pipe_ctx[i]; if (pipe_ctx && pipe_ctx->stream && !pipe_ctx->stream->dpms_off && - pipe_ctx->stream->link == link) + pipe_ctx->stream->link == link && !pipe_ctx->prev_odm_pipe) core_link_enable_stream(link->dc->current_state, pipe_ctx); } @@ -4230,7 +4233,7 @@ void dp_set_panel_mode(struct dc_link *link, enum dp_panel_mode panel_mode) if (edp_config_set.bits.PANEL_MODE_EDP != panel_mode_edp) { - enum ddc_result result = DDC_RESULT_UNKNOWN; + enum dc_status result = DC_ERROR_UNEXPECTED; edp_config_set.bits.PANEL_MODE_EDP = panel_mode_edp; @@ -4240,7 +4243,7 @@ void dp_set_panel_mode(struct dc_link *link, enum dp_panel_mode panel_mode) &edp_config_set.raw, sizeof(edp_config_set.raw)); - ASSERT(result == DDC_RESULT_SUCESSFULL); + ASSERT(result == DC_OK); } } DC_LOG_DETECTION_DP_CAPS("Link: %d eDP panel mode supported: %d " diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c index b409f6b2bfd832..210466b2d8631f 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c @@ -119,7 +119,8 @@ static const struct link_encoder_funcs dce110_lnk_enc_funcs = { .disable_hpd = dce110_link_encoder_disable_hpd, .is_dig_enabled = dce110_is_dig_enabled, .destroy = dce110_link_encoder_destroy, - .get_max_link_cap = dce110_link_encoder_get_max_link_cap + .get_max_link_cap = dce110_link_encoder_get_max_link_cap, + .get_dig_frontend = dce110_get_dig_frontend, }; static enum bp_result link_transmitter_control( @@ -235,6 +236,44 @@ static void set_link_training_complete( } +unsigned int dce110_get_dig_frontend(struct link_encoder *enc) +{ + struct dce110_link_encoder *enc110 = TO_DCE110_LINK_ENC(enc); + u32 value; + enum engine_id result; + + REG_GET(DIG_BE_CNTL, DIG_FE_SOURCE_SELECT, &value); + + switch (value) { + case DCE110_DIG_FE_SOURCE_SELECT_DIGA: + result = ENGINE_ID_DIGA; + break; + case DCE110_DIG_FE_SOURCE_SELECT_DIGB: + result = ENGINE_ID_DIGB; + break; + case DCE110_DIG_FE_SOURCE_SELECT_DIGC: + result = ENGINE_ID_DIGC; + break; + case DCE110_DIG_FE_SOURCE_SELECT_DIGD: + result = ENGINE_ID_DIGD; + break; + case DCE110_DIG_FE_SOURCE_SELECT_DIGE: + result = ENGINE_ID_DIGE; + break; + case DCE110_DIG_FE_SOURCE_SELECT_DIGF: + result = ENGINE_ID_DIGF; + break; + case DCE110_DIG_FE_SOURCE_SELECT_DIGG: + result = ENGINE_ID_DIGG; + break; + default: + // invalid source select DIG + result = ENGINE_ID_UNKNOWN; + } + + return result; +} + void dce110_link_encoder_set_dp_phy_pattern_training_pattern( struct link_encoder *enc, uint32_t index) @@ -1665,7 +1704,8 @@ static const struct link_encoder_funcs dce60_lnk_enc_funcs = { .disable_hpd = dce110_link_encoder_disable_hpd, .is_dig_enabled = dce110_is_dig_enabled, .destroy = dce110_link_encoder_destroy, - .get_max_link_cap = dce110_link_encoder_get_max_link_cap + .get_max_link_cap = dce110_link_encoder_get_max_link_cap, + .get_dig_frontend = dce110_get_dig_frontend }; void dce60_link_encoder_construct( diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.h b/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.h index cb714a48b171c3..fc6ade824c231a 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.h +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.h @@ -295,6 +295,8 @@ void dce110_link_encoder_connect_dig_be_to_fe( enum engine_id engine, bool connect); +unsigned int dce110_get_dig_frontend(struct link_encoder *enc); + void dce110_link_encoder_set_dp_phy_pattern_training_pattern( struct link_encoder *enc, uint32_t index); diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/Makefile b/drivers/gpu/drm/amd/display/dc/dcn10/Makefile index 733e6e6e43bd65..62ad1a11bff9c5 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn10/Makefile @@ -31,11 +31,4 @@ DCN10 = dcn10_init.o dcn10_resource.o dcn10_ipp.o dcn10_hw_sequencer.o \ AMD_DAL_DCN10 = $(addprefix $(AMDDALPATH)/dc/dcn10/,$(DCN10)) -# fix: -# ...: '-mgeneral-regs-only' is incompatible with the use of floating-point types -# aarch64 does not support soft-float, so use hard-float and handle this in code -ifdef CONFIG_ARM64 -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dcn10/dcn10_resource.o := -mgeneral-regs-only -endif - AMD_DISPLAY_FILES += $(AMD_DAL_DCN10) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index d0f3bf953d0273..0d1e7b56fb3954 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -646,8 +646,13 @@ static void power_on_plane( if (REG(DC_IP_REQUEST_CNTL)) { REG_SET(DC_IP_REQUEST_CNTL, 0, IP_REQUEST_EN, 1); - hws->funcs.dpp_pg_control(hws, plane_id, true); - hws->funcs.hubp_pg_control(hws, plane_id, true); + + if (hws->funcs.dpp_pg_control) + hws->funcs.dpp_pg_control(hws, plane_id, true); + + if (hws->funcs.hubp_pg_control) + hws->funcs.hubp_pg_control(hws, plane_id, true); + REG_SET(DC_IP_REQUEST_CNTL, 0, IP_REQUEST_EN, 0); DC_LOG_DEBUG( @@ -1079,8 +1084,13 @@ void dcn10_plane_atomic_power_down(struct dc *dc, if (REG(DC_IP_REQUEST_CNTL)) { REG_SET(DC_IP_REQUEST_CNTL, 0, IP_REQUEST_EN, 1); - hws->funcs.dpp_pg_control(hws, dpp->inst, false); - hws->funcs.hubp_pg_control(hws, hubp->inst, false); + + if (hws->funcs.dpp_pg_control) + hws->funcs.dpp_pg_control(hws, dpp->inst, false); + + if (hws->funcs.hubp_pg_control) + hws->funcs.hubp_pg_control(hws, hubp->inst, false); + dpp->funcs->dpp_reset(dpp); REG_SET(DC_IP_REQUEST_CNTL, 0, IP_REQUEST_EN, 0); diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c index a78712caf1244f..0a01be38ee1b8a 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c @@ -608,8 +608,8 @@ static const struct dc_debug_options debug_defaults_drv = { .disable_pplib_clock_request = false, .disable_pplib_wm_range = false, .pplib_wm_report_mode = WM_REPORT_DEFAULT, - .pipe_split_policy = MPC_SPLIT_DYNAMIC, - .force_single_disp_pipe_split = true, + .pipe_split_policy = MPC_SPLIT_AVOID, + .force_single_disp_pipe_split = false, .disable_dcc = DCC_ENABLE, .voltage_align_fclk = true, .disable_stereo_support = true, @@ -1339,47 +1339,6 @@ static uint32_t read_pipe_fuses(struct dc_context *ctx) return value; } -/* - * Some architectures don't support soft-float (e.g. aarch64), on those - * this function has to be called with hardfloat enabled, make sure not - * to inline it so whatever fp stuff is done stays inside - */ -static noinline void dcn10_resource_construct_fp( - struct dc *dc) -{ - if (dc->ctx->dce_version == DCN_VERSION_1_01) { - struct dcn_soc_bounding_box *dcn_soc = dc->dcn_soc; - struct dcn_ip_params *dcn_ip = dc->dcn_ip; - struct display_mode_lib *dml = &dc->dml; - - dml->ip.max_num_dpp = 3; - /* TODO how to handle 23.84? */ - dcn_soc->dram_clock_change_latency = 23; - dcn_ip->max_num_dpp = 3; - } - if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev)) { - dc->dcn_soc->urgent_latency = 3; - dc->debug.disable_dmcu = true; - dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 41.60f; - } - - - dc->dcn_soc->number_of_channels = dc->ctx->asic_id.vram_width / ddr4_dram_width; - ASSERT(dc->dcn_soc->number_of_channels < 3); - if (dc->dcn_soc->number_of_channels == 0)/*old sbios bug*/ - dc->dcn_soc->number_of_channels = 2; - - if (dc->dcn_soc->number_of_channels == 1) { - dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 19.2f; - dc->dcn_soc->fabric_and_dram_bandwidth_vnom0p8 = 17.066f; - dc->dcn_soc->fabric_and_dram_bandwidth_vmid0p72 = 14.933f; - dc->dcn_soc->fabric_and_dram_bandwidth_vmin0p65 = 12.8f; - if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev)) { - dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 20.80f; - } - } -} - static bool dcn10_resource_construct( uint8_t num_virtual_links, struct dc *dc, @@ -1531,15 +1490,37 @@ static bool dcn10_resource_construct( memcpy(dc->dcn_ip, &dcn10_ip_defaults, sizeof(dcn10_ip_defaults)); memcpy(dc->dcn_soc, &dcn10_soc_defaults, sizeof(dcn10_soc_defaults)); -#if defined(CONFIG_ARM64) - /* Aarch64 does not support -msoft-float/-mfloat-abi=soft */ - DC_FP_START(); - dcn10_resource_construct_fp(dc); - DC_FP_END(); -#else - /* Other architectures we build for build this with soft-float */ - dcn10_resource_construct_fp(dc); -#endif + if (dc->ctx->dce_version == DCN_VERSION_1_01) { + struct dcn_soc_bounding_box *dcn_soc = dc->dcn_soc; + struct dcn_ip_params *dcn_ip = dc->dcn_ip; + struct display_mode_lib *dml = &dc->dml; + + dml->ip.max_num_dpp = 3; + /* TODO how to handle 23.84? */ + dcn_soc->dram_clock_change_latency = 23; + dcn_ip->max_num_dpp = 3; + } + if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev)) { + dc->dcn_soc->urgent_latency = 3; + dc->debug.disable_dmcu = true; + dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 41.60f; + } + + + dc->dcn_soc->number_of_channels = dc->ctx->asic_id.vram_width / ddr4_dram_width; + ASSERT(dc->dcn_soc->number_of_channels < 3); + if (dc->dcn_soc->number_of_channels == 0)/*old sbios bug*/ + dc->dcn_soc->number_of_channels = 2; + + if (dc->dcn_soc->number_of_channels == 1) { + dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 19.2f; + dc->dcn_soc->fabric_and_dram_bandwidth_vnom0p8 = 17.066f; + dc->dcn_soc->fabric_and_dram_bandwidth_vmid0p72 = 14.933f; + dc->dcn_soc->fabric_and_dram_bandwidth_vmin0p65 = 12.8f; + if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev)) { + dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 20.80f; + } + } pool->base.pp_smu = dcn10_pp_smu_create(ctx); diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/Makefile b/drivers/gpu/drm/amd/display/dc/dcn20/Makefile index 624cb1341ef145..5fcaf78334ff9a 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn20/Makefile @@ -17,10 +17,6 @@ ifdef CONFIG_PPC64 CFLAGS_$(AMDDALPATH)/dc/dcn20/dcn20_resource.o := -mhard-float -maltivec endif -ifdef CONFIG_ARM64 -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dcn20/dcn20_resource.o := -mgeneral-regs-only -endif - ifdef CONFIG_CC_IS_GCC ifeq ($(call cc-ifversion, -lt, 0701, y), y) IS_OLD_GCC = 1 diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c index 01530e686f4371..f1e9b3b06b924d 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c @@ -1069,8 +1069,13 @@ static void dcn20_power_on_plane( if (REG(DC_IP_REQUEST_CNTL)) { REG_SET(DC_IP_REQUEST_CNTL, 0, IP_REQUEST_EN, 1); - dcn20_dpp_pg_control(hws, pipe_ctx->plane_res.dpp->inst, true); - dcn20_hubp_pg_control(hws, pipe_ctx->plane_res.hubp->inst, true); + + if (hws->funcs.dpp_pg_control) + hws->funcs.dpp_pg_control(hws, pipe_ctx->plane_res.dpp->inst, true); + + if (hws->funcs.hubp_pg_control) + hws->funcs.hubp_pg_control(hws, pipe_ctx->plane_res.hubp->inst, true); + REG_SET(DC_IP_REQUEST_CNTL, 0, IP_REQUEST_EN, 0); DC_LOG_DEBUG( diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c index d50a9c37063729..a92f6e4b2eb8ff 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c @@ -2520,8 +2520,7 @@ struct pipe_ctx *dcn20_find_secondary_pipe(struct dc *dc, * if this primary pipe has a bottom pipe in prev. state * and if the bottom pipe is still available (which it should be), * pick that pipe as secondary - * Same logic applies for ODM pipes. Since mpo is not allowed with odm - * check in else case. + * Same logic applies for ODM pipes */ if (dc->current_state->res_ctx.pipe_ctx[primary_pipe->pipe_idx].bottom_pipe) { preferred_pipe_idx = dc->current_state->res_ctx.pipe_ctx[primary_pipe->pipe_idx].bottom_pipe->pipe_idx; @@ -2529,7 +2528,9 @@ struct pipe_ctx *dcn20_find_secondary_pipe(struct dc *dc, secondary_pipe = &res_ctx->pipe_ctx[preferred_pipe_idx]; secondary_pipe->pipe_idx = preferred_pipe_idx; } - } else if (dc->current_state->res_ctx.pipe_ctx[primary_pipe->pipe_idx].next_odm_pipe) { + } + if (secondary_pipe == NULL && + dc->current_state->res_ctx.pipe_ctx[primary_pipe->pipe_idx].next_odm_pipe) { preferred_pipe_idx = dc->current_state->res_ctx.pipe_ctx[primary_pipe->pipe_idx].next_odm_pipe->pipe_idx; if (res_ctx->pipe_ctx[preferred_pipe_idx].stream == NULL) { secondary_pipe = &res_ctx->pipe_ctx[preferred_pipe_idx]; diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/Makefile b/drivers/gpu/drm/amd/display/dc/dcn21/Makefile index 51a2f3d4c194b1..07684d3e375abd 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn21/Makefile @@ -13,10 +13,6 @@ ifdef CONFIG_PPC64 CFLAGS_$(AMDDALPATH)/dc/dcn21/dcn21_resource.o := -mhard-float -maltivec endif -ifdef CONFIG_ARM64 -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dcn21/dcn21_resource.o := -mgeneral-regs-only -endif - ifdef CONFIG_CC_IS_GCC ifeq ($(call cc-ifversion, -lt, 0701, y), y) IS_OLD_GCC = 1 diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c index e73785e74cba8b..20441127783ba1 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c @@ -295,7 +295,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn2_1_soc = { .num_banks = 8, .num_chans = 4, .vmm_page_size_bytes = 4096, - .dram_clock_change_latency_us = 23.84, + .dram_clock_change_latency_us = 11.72, .return_bus_width_bytes = 64, .dispclk_dppclk_vco_speed_mhz = 3600, .xfc_bus_transport_time_us = 4, diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile index dbc7e2abe3795b..417331438c3061 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile @@ -33,10 +33,6 @@ ifdef CONFIG_PPC64 dml_ccflags := -mhard-float -maltivec endif -ifdef CONFIG_ARM64 -dml_rcflags := -mgeneral-regs-only -endif - ifdef CONFIG_CC_IS_GCC ifeq ($(call cc-ifversion, -lt, 0701, y), y) IS_OLD_GCC = 1 @@ -64,13 +60,6 @@ CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20v2.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_rq_dlg_calc_20v2.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn21/display_mode_vba_21.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn21/display_rq_dlg_calc_21.o := $(dml_ccflags) -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/display_mode_vba.o := $(dml_rcflags) -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20.o := $(dml_rcflags) -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn20/display_rq_dlg_calc_20.o := $(dml_rcflags) -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20v2.o := $(dml_rcflags) -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn20/display_rq_dlg_calc_20v2.o := $(dml_rcflags) -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn21/display_mode_vba_21.o := $(dml_rcflags) -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn21/display_rq_dlg_calc_21.o := $(dml_rcflags) endif ifdef CONFIG_DRM_AMD_DC_DCN3_0 CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/display_mode_vba_30.o := $(dml_ccflags) -Wframe-larger-than=2048 @@ -78,8 +67,6 @@ CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/display_rq_dlg_calc_30.o := $(dml_ccflags) endif CFLAGS_$(AMDDALPATH)/dc/dml/dml1_display_rq_dlg_calc.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/display_rq_dlg_helpers.o := $(dml_ccflags) -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dml1_display_rq_dlg_calc.o := $(dml_rcflags) -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/display_rq_dlg_helpers.o := $(dml_rcflags) DML = display_mode_lib.o display_rq_dlg_helpers.o dml1_display_rq_dlg_calc.o \ diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c index 860e72a51534cc..80170f9721ce94 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c @@ -2635,14 +2635,15 @@ static void dml20v2_DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndP } if (mode_lib->vba.DRAMClockChangeSupportsVActive && - mode_lib->vba.MinActiveDRAMClockChangeMargin > 60 && - mode_lib->vba.PrefetchMode[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb] == 0) { + mode_lib->vba.MinActiveDRAMClockChangeMargin > 60) { mode_lib->vba.DRAMClockChangeWatermark += 25; for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { - if (mode_lib->vba.DRAMClockChangeWatermark > - dml_max(mode_lib->vba.StutterEnterPlusExitWatermark, mode_lib->vba.UrgentWatermark)) - mode_lib->vba.MinTTUVBlank[k] += 25; + if (mode_lib->vba.PrefetchMode[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb] == 0) { + if (mode_lib->vba.DRAMClockChangeWatermark > + dml_max(mode_lib->vba.StutterEnterPlusExitWatermark, mode_lib->vba.UrgentWatermark)) + mode_lib->vba.MinTTUVBlank[k] += 25; + } } mode_lib->vba.DRAMClockChangeSupport[0][0] = dm_dram_clock_change_vactive; diff --git a/drivers/gpu/drm/amd/display/dc/dsc/Makefile b/drivers/gpu/drm/amd/display/dc/dsc/Makefile index f2624a1156e5c8..ea29cf95d470b8 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dsc/Makefile @@ -10,10 +10,6 @@ ifdef CONFIG_PPC64 dsc_ccflags := -mhard-float -maltivec endif -ifdef CONFIG_ARM64 -dsc_rcflags := -mgeneral-regs-only -endif - ifdef CONFIG_CC_IS_GCC ifeq ($(call cc-ifversion, -lt, 0701, y), y) IS_OLD_GCC = 1 @@ -32,7 +28,6 @@ endif endif CFLAGS_$(AMDDALPATH)/dc/dsc/rc_calc.o := $(dsc_ccflags) -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dsc/rc_calc.o := $(dsc_rcflags) DSC = dc_dsc.o rc_calc.o rc_calc_dpi.o diff --git a/drivers/gpu/drm/amd/display/dc/os_types.h b/drivers/gpu/drm/amd/display/dc/os_types.h index 95cb56929e79e7..126c2f3a4dd3b2 100644 --- a/drivers/gpu/drm/amd/display/dc/os_types.h +++ b/drivers/gpu/drm/amd/display/dc/os_types.h @@ -55,10 +55,6 @@ #include #define DC_FP_START() kernel_fpu_begin() #define DC_FP_END() kernel_fpu_end() -#elif defined(CONFIG_ARM64) -#include -#define DC_FP_START() kernel_neon_begin() -#define DC_FP_END() kernel_neon_end() #elif defined(CONFIG_PPC64) #include #include diff --git a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c index b8695660b480e9..09bc2c249e1af4 100644 --- a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c +++ b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c @@ -1614,7 +1614,7 @@ static void apply_degamma_for_user_regamma(struct pwl_float_data_ex *rgb_regamma struct pwl_float_data_ex *rgb = rgb_regamma; const struct hw_x_point *coord_x = coordinates_x; - build_coefficients(&coeff, true); + build_coefficients(&coeff, TRANSFER_FUNCTION_SRGB); i = 0; while (i != hw_points_num + 1) { diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h index 44fd0cd069de67..95d0f18ed0c567 100644 --- a/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h @@ -575,6 +575,7 @@ struct pptable_funcs { int (*conv_power_profile_to_pplib_workload)(int power_profile); uint32_t (*get_fan_control_mode)(struct smu_context *smu); int (*set_fan_control_mode)(struct smu_context *smu, uint32_t mode); + int (*set_fan_speed_percent)(struct smu_context *smu, uint32_t speed); int (*set_fan_speed_rpm)(struct smu_context *smu, uint32_t speed); int (*set_xgmi_pstate)(struct smu_context *smu, uint32_t pstate); int (*gfx_off_control)(struct smu_context *smu, bool enable); diff --git a/drivers/gpu/drm/amd/pm/inc/smu_v11_0.h b/drivers/gpu/drm/amd/pm/inc/smu_v11_0.h index 2d1c3babaa3a0f..0046f1c26fc2da 100644 --- a/drivers/gpu/drm/amd/pm/inc/smu_v11_0.h +++ b/drivers/gpu/drm/amd/pm/inc/smu_v11_0.h @@ -200,6 +200,9 @@ int smu_v11_0_set_fan_control_mode(struct smu_context *smu, uint32_t mode); +int +smu_v11_0_set_fan_speed_percent(struct smu_context *smu, uint32_t speed); + int smu_v11_0_set_fan_speed_rpm(struct smu_context *smu, uint32_t speed); diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index b1e5ec01527b82..5cc45b1cff7e79 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -2255,19 +2255,14 @@ int smu_get_fan_speed_percent(struct smu_context *smu, uint32_t *speed) int smu_set_fan_speed_percent(struct smu_context *smu, uint32_t speed) { int ret = 0; - uint32_t rpm; if (!smu->pm_enabled || !smu->adev->pm.dpm_enabled) return -EOPNOTSUPP; mutex_lock(&smu->mutex); - if (smu->ppt_funcs->set_fan_speed_rpm) { - if (speed > 100) - speed = 100; - rpm = speed * smu->fan_max_rpm / 100; - ret = smu->ppt_funcs->set_fan_speed_rpm(smu, rpm); - } + if (smu->ppt_funcs->set_fan_speed_percent) + ret = smu->ppt_funcs->set_fan_speed_percent(smu, speed); mutex_unlock(&smu->mutex); diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c index fc376281e629a0..1c526cb239e035 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c @@ -2366,6 +2366,7 @@ static const struct pptable_funcs arcturus_ppt_funcs = { .display_clock_voltage_request = smu_v11_0_display_clock_voltage_request, .get_fan_control_mode = smu_v11_0_get_fan_control_mode, .set_fan_control_mode = smu_v11_0_set_fan_control_mode, + .set_fan_speed_percent = smu_v11_0_set_fan_speed_percent, .set_fan_speed_rpm = smu_v11_0_set_fan_speed_rpm, .set_xgmi_pstate = smu_v11_0_set_xgmi_pstate, .gfx_off_control = smu_v11_0_gfx_off_control, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c index ef1a62e86a0ee2..f2c8719b8395ea 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c @@ -2710,6 +2710,7 @@ static const struct pptable_funcs navi10_ppt_funcs = { .display_clock_voltage_request = smu_v11_0_display_clock_voltage_request, .get_fan_control_mode = smu_v11_0_get_fan_control_mode, .set_fan_control_mode = smu_v11_0_set_fan_control_mode, + .set_fan_speed_percent = smu_v11_0_set_fan_speed_percent, .set_fan_speed_rpm = smu_v11_0_set_fan_speed_rpm, .set_xgmi_pstate = smu_v11_0_set_xgmi_pstate, .gfx_off_control = smu_v11_0_gfx_off_control, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c index cf7c4f0e0a0b51..31da8fae6fa9d6 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c @@ -2776,6 +2776,7 @@ static const struct pptable_funcs sienna_cichlid_ppt_funcs = { .display_clock_voltage_request = smu_v11_0_display_clock_voltage_request, .get_fan_control_mode = smu_v11_0_get_fan_control_mode, .set_fan_control_mode = smu_v11_0_set_fan_control_mode, + .set_fan_speed_percent = smu_v11_0_set_fan_speed_percent, .set_fan_speed_rpm = smu_v11_0_set_fan_speed_rpm, .set_xgmi_pstate = smu_v11_0_set_xgmi_pstate, .gfx_off_control = smu_v11_0_gfx_off_control, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c index 6db96fa1df0926..e646f5931d7953 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c @@ -1122,6 +1122,35 @@ smu_v11_0_set_fan_static_mode(struct smu_context *smu, uint32_t mode) return 0; } +int +smu_v11_0_set_fan_speed_percent(struct smu_context *smu, uint32_t speed) +{ + struct amdgpu_device *adev = smu->adev; + uint32_t duty100, duty; + uint64_t tmp64; + + if (speed > 100) + speed = 100; + + if (smu_v11_0_auto_fan_control(smu, 0)) + return -EINVAL; + + duty100 = REG_GET_FIELD(RREG32_SOC15(THM, 0, mmCG_FDO_CTRL1), + CG_FDO_CTRL1, FMAX_DUTY100); + if (!duty100) + return -EINVAL; + + tmp64 = (uint64_t)speed * duty100; + do_div(tmp64, 100); + duty = (uint32_t)tmp64; + + WREG32_SOC15(THM, 0, mmCG_FDO_CTRL0, + REG_SET_FIELD(RREG32_SOC15(THM, 0, mmCG_FDO_CTRL0), + CG_FDO_CTRL0, FDO_STATIC_DUTY, duty)); + + return smu_v11_0_set_fan_static_mode(smu, FDO_PWM_MODE_STATIC); +} + int smu_v11_0_set_fan_control_mode(struct smu_context *smu, uint32_t mode) @@ -1130,7 +1159,7 @@ smu_v11_0_set_fan_control_mode(struct smu_context *smu, switch (mode) { case AMD_FAN_CTRL_NONE: - ret = smu_v11_0_set_fan_speed_rpm(smu, smu->fan_max_rpm); + ret = smu_v11_0_set_fan_speed_percent(smu, 100); break; case AMD_FAN_CTRL_MANUAL: ret = smu_v11_0_auto_fan_control(smu, 0); diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c index 66c1026489bee5..425c48e100e4f7 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c @@ -188,6 +188,7 @@ static int renoir_get_dpm_clk_limited(struct smu_context *smu, enum smu_clk_type return -EINVAL; *freq = clk_table->SocClocks[dpm_level].Freq; break; + case SMU_UCLK: case SMU_MCLK: if (dpm_level >= NUM_FCLK_DPM_LEVELS) return -EINVAL; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu12/smu_v12_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu12/smu_v12_0.c index 660f403d5770ce..7907c9e0b5decc 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu12/smu_v12_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu12/smu_v12_0.c @@ -222,6 +222,7 @@ int smu_v12_0_set_soft_freq_limited_range(struct smu_context *smu, enum smu_clk_ break; case SMU_FCLK: case SMU_MCLK: + case SMU_UCLK: ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetHardMinFclkByFreq, min, NULL); if (ret) return ret; diff --git a/drivers/gpu/drm/aspeed/Kconfig b/drivers/gpu/drm/aspeed/Kconfig index 018383cfcfa790..5e95bcea43e92c 100644 --- a/drivers/gpu/drm/aspeed/Kconfig +++ b/drivers/gpu/drm/aspeed/Kconfig @@ -3,6 +3,7 @@ config DRM_ASPEED_GFX tristate "ASPEED BMC Display Controller" depends on DRM && OF depends on (COMPILE_TEST || ARCH_ASPEED) + depends on MMU select DRM_KMS_HELPER select DRM_KMS_CMA_HELPER select DMA_CMA if HAVE_DMA_CONTIGUOUS diff --git a/drivers/gpu/drm/bridge/sii902x.c b/drivers/gpu/drm/bridge/sii902x.c index 33fd33f953ec46..89558e5815303c 100644 --- a/drivers/gpu/drm/bridge/sii902x.c +++ b/drivers/gpu/drm/bridge/sii902x.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -168,6 +169,7 @@ struct sii902x { struct drm_connector connector; struct gpio_desc *reset_gpio; struct i2c_mux_core *i2cmux; + struct regulator_bulk_data supplies[2]; /* * Mutex protects audio and video functions from interfering * each other, by keeping their i2c command sequences atomic. @@ -954,41 +956,13 @@ static const struct drm_bridge_timings default_sii902x_timings = { | DRM_BUS_FLAG_DE_HIGH, }; -static int sii902x_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int sii902x_init(struct sii902x *sii902x) { - struct device *dev = &client->dev; + struct device *dev = &sii902x->i2c->dev; unsigned int status = 0; - struct sii902x *sii902x; u8 chipid[4]; int ret; - ret = i2c_check_functionality(client->adapter, - I2C_FUNC_SMBUS_BYTE_DATA); - if (!ret) { - dev_err(dev, "I2C adapter not suitable\n"); - return -EIO; - } - - sii902x = devm_kzalloc(dev, sizeof(*sii902x), GFP_KERNEL); - if (!sii902x) - return -ENOMEM; - - sii902x->i2c = client; - sii902x->regmap = devm_regmap_init_i2c(client, &sii902x_regmap_config); - if (IS_ERR(sii902x->regmap)) - return PTR_ERR(sii902x->regmap); - - sii902x->reset_gpio = devm_gpiod_get_optional(dev, "reset", - GPIOD_OUT_LOW); - if (IS_ERR(sii902x->reset_gpio)) { - dev_err(dev, "Failed to retrieve/request reset gpio: %ld\n", - PTR_ERR(sii902x->reset_gpio)); - return PTR_ERR(sii902x->reset_gpio); - } - - mutex_init(&sii902x->mutex); - sii902x_reset(sii902x); ret = regmap_write(sii902x->regmap, SII902X_REG_TPI_RQB, 0x0); @@ -1012,11 +986,11 @@ static int sii902x_probe(struct i2c_client *client, regmap_read(sii902x->regmap, SII902X_INT_STATUS, &status); regmap_write(sii902x->regmap, SII902X_INT_STATUS, status); - if (client->irq > 0) { + if (sii902x->i2c->irq > 0) { regmap_write(sii902x->regmap, SII902X_INT_ENABLE, SII902X_HOTPLUG_EVENT); - ret = devm_request_threaded_irq(dev, client->irq, NULL, + ret = devm_request_threaded_irq(dev, sii902x->i2c->irq, NULL, sii902x_interrupt, IRQF_ONESHOT, dev_name(dev), sii902x); @@ -1031,9 +1005,9 @@ static int sii902x_probe(struct i2c_client *client, sii902x_audio_codec_init(sii902x, dev); - i2c_set_clientdata(client, sii902x); + i2c_set_clientdata(sii902x->i2c, sii902x); - sii902x->i2cmux = i2c_mux_alloc(client->adapter, dev, + sii902x->i2cmux = i2c_mux_alloc(sii902x->i2c->adapter, dev, 1, 0, I2C_MUX_GATE, sii902x_i2c_bypass_select, sii902x_i2c_bypass_deselect); @@ -1044,6 +1018,62 @@ static int sii902x_probe(struct i2c_client *client, return i2c_mux_add_adapter(sii902x->i2cmux, 0, 0, 0); } +static int sii902x_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct device *dev = &client->dev; + struct sii902x *sii902x; + int ret; + + ret = i2c_check_functionality(client->adapter, + I2C_FUNC_SMBUS_BYTE_DATA); + if (!ret) { + dev_err(dev, "I2C adapter not suitable\n"); + return -EIO; + } + + sii902x = devm_kzalloc(dev, sizeof(*sii902x), GFP_KERNEL); + if (!sii902x) + return -ENOMEM; + + sii902x->i2c = client; + sii902x->regmap = devm_regmap_init_i2c(client, &sii902x_regmap_config); + if (IS_ERR(sii902x->regmap)) + return PTR_ERR(sii902x->regmap); + + sii902x->reset_gpio = devm_gpiod_get_optional(dev, "reset", + GPIOD_OUT_LOW); + if (IS_ERR(sii902x->reset_gpio)) { + dev_err(dev, "Failed to retrieve/request reset gpio: %ld\n", + PTR_ERR(sii902x->reset_gpio)); + return PTR_ERR(sii902x->reset_gpio); + } + + mutex_init(&sii902x->mutex); + + sii902x->supplies[0].supply = "iovcc"; + sii902x->supplies[1].supply = "cvcc12"; + ret = devm_regulator_bulk_get(dev, ARRAY_SIZE(sii902x->supplies), + sii902x->supplies); + if (ret < 0) + return ret; + + ret = regulator_bulk_enable(ARRAY_SIZE(sii902x->supplies), + sii902x->supplies); + if (ret < 0) { + dev_err_probe(dev, ret, "Failed to enable supplies"); + return ret; + } + + ret = sii902x_init(sii902x); + if (ret < 0) { + regulator_bulk_disable(ARRAY_SIZE(sii902x->supplies), + sii902x->supplies); + } + + return ret; +} + static int sii902x_remove(struct i2c_client *client) { @@ -1051,6 +1081,8 @@ static int sii902x_remove(struct i2c_client *client) i2c_mux_del_adapters(sii902x->i2cmux); drm_bridge_remove(&sii902x->bridge); + regulator_bulk_disable(ARRAY_SIZE(sii902x->supplies), + sii902x->supplies); return 0; } diff --git a/drivers/gpu/drm/bridge/ti-tpd12s015.c b/drivers/gpu/drm/bridge/ti-tpd12s015.c index 514cbf0eac75af..e0e015243a602d 100644 --- a/drivers/gpu/drm/bridge/ti-tpd12s015.c +++ b/drivers/gpu/drm/bridge/ti-tpd12s015.c @@ -160,7 +160,7 @@ static int tpd12s015_probe(struct platform_device *pdev) /* Register the IRQ if the HPD GPIO is IRQ-capable. */ tpd->hpd_irq = gpiod_to_irq(tpd->hpd_gpio); - if (tpd->hpd_irq) { + if (tpd->hpd_irq >= 0) { ret = devm_request_threaded_irq(&pdev->dev, tpd->hpd_irq, NULL, tpd12s015_hpd_isr, IRQF_TRIGGER_RISING | diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c index f9170b4b22e7e8..8a871e5c3e26ba 100644 --- a/drivers/gpu/drm/drm_atomic_helper.c +++ b/drivers/gpu/drm/drm_atomic_helper.c @@ -3007,7 +3007,7 @@ int drm_atomic_helper_set_config(struct drm_mode_set *set, ret = handle_conflicting_encoders(state, true); if (ret) - return ret; + goto fail; ret = drm_atomic_commit(state); diff --git a/drivers/gpu/drm/drm_dp_aux_dev.c b/drivers/gpu/drm/drm_dp_aux_dev.c index 2510717d5a08fa..e25181bf2c480c 100644 --- a/drivers/gpu/drm/drm_dp_aux_dev.c +++ b/drivers/gpu/drm/drm_dp_aux_dev.c @@ -63,7 +63,7 @@ static struct drm_dp_aux_dev *drm_dp_aux_dev_get_by_minor(unsigned index) mutex_lock(&aux_idr_mutex); aux_dev = idr_find(&aux_idr, index); - if (!kref_get_unless_zero(&aux_dev->refcount)) + if (aux_dev && !kref_get_unless_zero(&aux_dev->refcount)) aux_dev = NULL; mutex_unlock(&aux_idr_mutex); diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index 631125b46e04c6..b7ddf504e0249d 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -3102,6 +3102,8 @@ static int drm_cvt_modes(struct drm_connector *connector, height = (cvt->code[0] + ((cvt->code[1] & 0xf0) << 4) + 1) * 2; switch (cvt->code[1] & 0x0c) { + /* default - because compiler doesn't see that we've enumerated all cases */ + default: case 0x00: width = height * 4 / 3; break; diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c index 6e74e6745ecaeb..3491460498491d 100644 --- a/drivers/gpu/drm/drm_syncobj.c +++ b/drivers/gpu/drm/drm_syncobj.c @@ -388,19 +388,18 @@ int drm_syncobj_find_fence(struct drm_file *file_private, return -ENOENT; *fence = drm_syncobj_fence_get(syncobj); - drm_syncobj_put(syncobj); if (*fence) { ret = dma_fence_chain_find_seqno(fence, point); if (!ret) - return 0; + goto out; dma_fence_put(*fence); } else { ret = -EINVAL; } if (!(flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT)) - return ret; + goto out; memset(&wait, 0, sizeof(wait)); wait.task = current; @@ -432,6 +431,9 @@ int drm_syncobj_find_fence(struct drm_file *file_private, if (wait.node.next) drm_syncobj_remove_wait(syncobj, &wait); +out: + drm_syncobj_put(syncobj); + return ret; } EXPORT_SYMBOL(drm_syncobj_find_fence); diff --git a/drivers/gpu/drm/gma500/cdv_intel_dp.c b/drivers/gpu/drm/gma500/cdv_intel_dp.c index 720a767118c9cd..deb4fd13591d24 100644 --- a/drivers/gpu/drm/gma500/cdv_intel_dp.c +++ b/drivers/gpu/drm/gma500/cdv_intel_dp.c @@ -2083,7 +2083,7 @@ cdv_intel_dp_init(struct drm_device *dev, struct psb_intel_mode_device *mode_dev DRM_INFO("failed to retrieve link info, disabling eDP\n"); drm_encoder_cleanup(encoder); cdv_intel_dp_destroy(connector); - goto err_priv; + goto err_connector; } else { DRM_DEBUG_KMS("DPCD: Rev=%x LN_Rate=%x LN_CNT=%x LN_DOWNSP=%x\n", intel_dp->dpcd[0], intel_dp->dpcd[1], diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index e5574e506a5ccf..6d9e81ea67f4b7 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -38,6 +38,7 @@ i915-y += i915_drv.o \ i915_config.o \ i915_irq.o \ i915_getparam.o \ + i915_mitigations.o \ i915_params.o \ i915_pci.o \ i915_scatterlist.o \ diff --git a/drivers/gpu/drm/i915/display/icl_dsi.c b/drivers/gpu/drm/i915/display/icl_dsi.c index 520715b7d5b554..1515cf229ed125 100644 --- a/drivers/gpu/drm/i915/display/icl_dsi.c +++ b/drivers/gpu/drm/i915/display/icl_dsi.c @@ -1585,10 +1585,6 @@ static void gen11_dsi_get_power_domains(struct intel_encoder *encoder, get_dsi_io_power_domains(i915, enc_to_intel_dsi(encoder)); - - if (crtc_state->dsc.compression_enable) - intel_display_power_get(i915, - intel_dsc_power_domain(crtc_state)); } static bool gen11_dsi_get_hw_state(struct intel_encoder *encoder, diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index cdcb7b1034ae4a..3f2bbd9370a86b 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -3387,7 +3387,7 @@ static void tgl_ddi_pre_enable_dp(struct intel_atomic_state *state, intel_ddi_init_dp_buf_reg(encoder); if (!is_mst) - intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_ON); + intel_dp_set_power(intel_dp, DP_SET_POWER_D0); intel_dp_sink_set_decompression_state(intel_dp, crtc_state, true); /* @@ -3469,8 +3469,8 @@ static void hsw_ddi_pre_enable_dp(struct intel_atomic_state *state, intel_ddi_init_dp_buf_reg(encoder); if (!is_mst) - intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_ON); - intel_dp_configure_protocol_converter(intel_dp); + intel_dp_set_power(intel_dp, DP_SET_POWER_D0); + intel_dp_configure_protocol_converter(intel_dp, crtc_state); intel_dp_sink_set_decompression_state(intel_dp, crtc_state, true); intel_dp_sink_set_fec_ready(intel_dp, crtc_state); @@ -3647,7 +3647,7 @@ static void intel_ddi_post_disable_dp(struct intel_atomic_state *state, * Power down sink before disabling the port, otherwise we end * up getting interrupts from the sink on detecting link loss. */ - intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_OFF); + intel_dp_set_power(intel_dp, DP_SET_POWER_D3); if (INTEL_GEN(dev_priv) >= 12) { if (is_mst) { diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index 3d4bf9b6a0a2ca..06d4ce31838a57 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -1382,6 +1382,9 @@ struct intel_dp { bool ycbcr_444_to_420; } dfp; + /* To control wakeup latency, e.g. for irq-driven dp aux transfers. */ + struct pm_qos_request pm_qos; + /* Display stream compression testing */ bool force_dsc_en; diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 9bc59fd2f95f56..1937b3d6342ae7 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -1411,7 +1411,7 @@ intel_dp_aux_xfer(struct intel_dp *intel_dp, * lowest possible wakeup latency and so prevent the cpu from going into * deep sleep states. */ - cpu_latency_qos_update_request(&i915->pm_qos, 0); + cpu_latency_qos_update_request(&intel_dp->pm_qos, 0); intel_dp_check_edp(intel_dp); @@ -1544,7 +1544,7 @@ intel_dp_aux_xfer(struct intel_dp *intel_dp, ret = recv_bytes; out: - cpu_latency_qos_update_request(&i915->pm_qos, PM_QOS_DEFAULT_VALUE); + cpu_latency_qos_update_request(&intel_dp->pm_qos, PM_QOS_DEFAULT_VALUE); if (vdd) edp_panel_vdd_off(intel_dp, false); @@ -1776,6 +1776,9 @@ static i915_reg_t skl_aux_data_reg(struct intel_dp *intel_dp, int index) static void intel_dp_aux_fini(struct intel_dp *intel_dp) { + if (cpu_latency_qos_request_active(&intel_dp->pm_qos)) + cpu_latency_qos_remove_request(&intel_dp->pm_qos); + kfree(intel_dp->aux.name); } @@ -1818,6 +1821,7 @@ intel_dp_aux_init(struct intel_dp *intel_dp) aux_ch_name(dig_port->aux_ch), port_name(encoder->port)); intel_dp->aux.transfer = intel_dp_aux_transfer; + cpu_latency_qos_add_request(&intel_dp->pm_qos, PM_QOS_DEFAULT_VALUE); } bool intel_dp_source_supports_hbr2(struct intel_dp *intel_dp) @@ -3492,22 +3496,22 @@ void intel_dp_sink_set_decompression_state(struct intel_dp *intel_dp, enable ? "enable" : "disable"); } -/* If the sink supports it, try to set the power state appropriately */ -void intel_dp_sink_dpms(struct intel_dp *intel_dp, int mode) +/* If the device supports it, try to set the power state appropriately */ +void intel_dp_set_power(struct intel_dp *intel_dp, u8 mode) { - struct drm_i915_private *i915 = dp_to_i915(intel_dp); + struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base; + struct drm_i915_private *i915 = to_i915(encoder->base.dev); int ret, i; /* Should have a valid DPCD by this point */ if (intel_dp->dpcd[DP_DPCD_REV] < 0x11) return; - if (mode != DRM_MODE_DPMS_ON) { + if (mode != DP_SET_POWER_D0) { if (downstream_hpd_needs_d0(intel_dp)) return; - ret = drm_dp_dpcd_writeb(&intel_dp->aux, DP_SET_POWER, - DP_SET_POWER_D3); + ret = drm_dp_dpcd_writeb(&intel_dp->aux, DP_SET_POWER, mode); } else { struct intel_lspcon *lspcon = dp_to_lspcon(intel_dp); @@ -3516,8 +3520,7 @@ void intel_dp_sink_dpms(struct intel_dp *intel_dp, int mode) * time to wake up. */ for (i = 0; i < 3; i++) { - ret = drm_dp_dpcd_writeb(&intel_dp->aux, DP_SET_POWER, - DP_SET_POWER_D0); + ret = drm_dp_dpcd_writeb(&intel_dp->aux, DP_SET_POWER, mode); if (ret == 1) break; msleep(1); @@ -3528,8 +3531,9 @@ void intel_dp_sink_dpms(struct intel_dp *intel_dp, int mode) } if (ret != 1) - drm_dbg_kms(&i915->drm, "failed to %s sink power state\n", - mode == DRM_MODE_DPMS_ON ? "enable" : "disable"); + drm_dbg_kms(&i915->drm, "[ENCODER:%d:%s] Set power to %s failed\n", + encoder->base.base.id, encoder->base.name, + mode == DP_SET_POWER_D0 ? "D0" : "D3"); } static bool cpt_dp_port_selected(struct drm_i915_private *dev_priv, @@ -3703,7 +3707,7 @@ static void intel_disable_dp(struct intel_atomic_state *state, * ensure that we have vdd while we switch off the panel. */ intel_edp_panel_vdd_on(intel_dp); intel_edp_backlight_off(old_conn_state); - intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_OFF); + intel_dp_set_power(intel_dp, DP_SET_POWER_D3); intel_edp_panel_off(intel_dp); } @@ -3852,7 +3856,8 @@ static void intel_dp_enable_port(struct intel_dp *intel_dp, intel_de_posting_read(dev_priv, intel_dp->output_reg); } -void intel_dp_configure_protocol_converter(struct intel_dp *intel_dp) +void intel_dp_configure_protocol_converter(struct intel_dp *intel_dp, + const struct intel_crtc_state *crtc_state) { struct drm_i915_private *i915 = dp_to_i915(intel_dp); u8 tmp; @@ -3871,8 +3876,8 @@ void intel_dp_configure_protocol_converter(struct intel_dp *intel_dp) drm_dbg_kms(&i915->drm, "Failed to set protocol converter HDMI mode to %s\n", enableddisabled(intel_dp->has_hdmi_sink)); - tmp = intel_dp->dfp.ycbcr_444_to_420 ? - DP_CONVERSION_TO_YCBCR420_ENABLE : 0; + tmp = crtc_state->output_format == INTEL_OUTPUT_FORMAT_YCBCR444 && + intel_dp->dfp.ycbcr_444_to_420 ? DP_CONVERSION_TO_YCBCR420_ENABLE : 0; if (drm_dp_dpcd_writeb(&intel_dp->aux, DP_PROTOCOL_CONVERTER_CONTROL_1, tmp) != 1) @@ -3925,8 +3930,8 @@ static void intel_enable_dp(struct intel_atomic_state *state, lane_mask); } - intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_ON); - intel_dp_configure_protocol_converter(intel_dp); + intel_dp_set_power(intel_dp, DP_SET_POWER_D0); + intel_dp_configure_protocol_converter(intel_dp, pipe_config); intel_dp_start_link_train(intel_dp); intel_dp_stop_link_train(intel_dp); diff --git a/drivers/gpu/drm/i915/display/intel_dp.h b/drivers/gpu/drm/i915/display/intel_dp.h index 08a1c0aa8b94b7..2dd934182471e2 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.h +++ b/drivers/gpu/drm/i915/display/intel_dp.h @@ -50,8 +50,9 @@ int intel_dp_get_link_train_fallback_values(struct intel_dp *intel_dp, int link_rate, u8 lane_count); int intel_dp_retrain_link(struct intel_encoder *encoder, struct drm_modeset_acquire_ctx *ctx); -void intel_dp_sink_dpms(struct intel_dp *intel_dp, int mode); -void intel_dp_configure_protocol_converter(struct intel_dp *intel_dp); +void intel_dp_set_power(struct intel_dp *intel_dp, u8 mode); +void intel_dp_configure_protocol_converter(struct intel_dp *intel_dp, + const struct intel_crtc_state *crtc_state); void intel_dp_sink_set_decompression_state(struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state, bool enable); diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index 64d885539e94ad..5d745d9b99b2ad 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -488,7 +488,7 @@ static void intel_mst_pre_enable_dp(struct intel_atomic_state *state, intel_dp->active_mst_links); if (first_mst_stream) - intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_ON); + intel_dp_set_power(intel_dp, DP_SET_POWER_D0); drm_dp_send_power_updown_phy(&intel_dp->mst_mgr, connector->port, true); diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c index e08684e34078ab..91b37b76618d20 100644 --- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c @@ -2622,11 +2622,22 @@ static bool cnl_ddi_hdmi_pll_dividers(struct intel_crtc_state *crtc_state) return true; } +/* + * Display WA #22010492432: tgl + * Program half of the nominal DCO divider fraction value. + */ +static bool +tgl_combo_pll_div_frac_wa_needed(struct drm_i915_private *i915) +{ + return IS_TIGERLAKE(i915) && i915->dpll.ref_clks.nssc == 38400; +} + static int __cnl_ddi_wrpll_get_freq(struct drm_i915_private *dev_priv, const struct intel_shared_dpll *pll, int ref_clock) { const struct intel_dpll_hw_state *pll_state = &pll->state.hw_state; + u32 dco_fraction; u32 p0, p1, p2, dco_freq; p0 = pll_state->cfgcr1 & DPLL_CFGCR1_PDIV_MASK; @@ -2669,8 +2680,13 @@ static int __cnl_ddi_wrpll_get_freq(struct drm_i915_private *dev_priv, dco_freq = (pll_state->cfgcr0 & DPLL_CFGCR0_DCO_INTEGER_MASK) * ref_clock; - dco_freq += (((pll_state->cfgcr0 & DPLL_CFGCR0_DCO_FRACTION_MASK) >> - DPLL_CFGCR0_DCO_FRACTION_SHIFT) * ref_clock) / 0x8000; + dco_fraction = (pll_state->cfgcr0 & DPLL_CFGCR0_DCO_FRACTION_MASK) >> + DPLL_CFGCR0_DCO_FRACTION_SHIFT; + + if (tgl_combo_pll_div_frac_wa_needed(dev_priv)) + dco_fraction *= 2; + + dco_freq += (dco_fraction * ref_clock) / 0x8000; if (drm_WARN_ON(&dev_priv->drm, p0 == 0 || p1 == 0 || p2 == 0)) return 0; @@ -2948,16 +2964,6 @@ static const struct skl_wrpll_params tgl_tbt_pll_24MHz_values = { /* the following params are unused */ }; -/* - * Display WA #22010492432: tgl - * Divide the nominal .dco_fraction value by 2. - */ -static const struct skl_wrpll_params tgl_tbt_pll_38_4MHz_values = { - .dco_integer = 0x54, .dco_fraction = 0x1800, - /* the following params are unused */ - .pdiv = 0, .kdiv = 0, .qdiv_mode = 0, .qdiv_ratio = 0, -}; - static bool icl_calc_dp_combo_pll(struct intel_crtc_state *crtc_state, struct skl_wrpll_params *pll_params) { @@ -2991,14 +2997,12 @@ static bool icl_calc_tbt_pll(struct intel_crtc_state *crtc_state, MISSING_CASE(dev_priv->dpll.ref_clks.nssc); fallthrough; case 19200: + case 38400: *pll_params = tgl_tbt_pll_19_2MHz_values; break; case 24000: *pll_params = tgl_tbt_pll_24MHz_values; break; - case 38400: - *pll_params = tgl_tbt_pll_38_4MHz_values; - break; } } else { switch (dev_priv->dpll.ref_clks.nssc) { @@ -3065,9 +3069,14 @@ static void icl_calc_dpll_state(struct drm_i915_private *i915, const struct skl_wrpll_params *pll_params, struct intel_dpll_hw_state *pll_state) { + u32 dco_fraction = pll_params->dco_fraction; + memset(pll_state, 0, sizeof(*pll_state)); - pll_state->cfgcr0 = DPLL_CFGCR0_DCO_FRACTION(pll_params->dco_fraction) | + if (tgl_combo_pll_div_frac_wa_needed(i915)) + dco_fraction = DIV_ROUND_CLOSEST(dco_fraction, 2); + + pll_state->cfgcr0 = DPLL_CFGCR0_DCO_FRACTION(dco_fraction) | pll_params->dco_integer; pll_state->cfgcr1 = DPLL_CFGCR1_QDIV_RATIO(pll_params->qdiv_ratio) | diff --git a/drivers/gpu/drm/i915/display/intel_hdcp.c b/drivers/gpu/drm/i915/display/intel_hdcp.c index 5492076d1ae098..17a8c2e73a8206 100644 --- a/drivers/gpu/drm/i915/display/intel_hdcp.c +++ b/drivers/gpu/drm/i915/display/intel_hdcp.c @@ -2187,6 +2187,7 @@ void intel_hdcp_update_pipe(struct intel_atomic_state *state, if (content_protection_type_changed) { mutex_lock(&hdcp->mutex); hdcp->value = DRM_MODE_CONTENT_PROTECTION_DESIRED; + drm_connector_get(&connector->base); schedule_work(&hdcp->prop_work); mutex_unlock(&hdcp->mutex); } @@ -2198,6 +2199,14 @@ void intel_hdcp_update_pipe(struct intel_atomic_state *state, desired_and_not_enabled = hdcp->value != DRM_MODE_CONTENT_PROTECTION_ENABLED; mutex_unlock(&hdcp->mutex); + /* + * If HDCP already ENABLED and CP property is DESIRED, schedule + * prop_work to update correct CP property to user space. + */ + if (!desired_and_not_enabled && !content_protection_type_changed) { + drm_connector_get(&connector->base); + schedule_work(&hdcp->prop_work); + } } if (desired_and_not_enabled || content_protection_type_changed) diff --git a/drivers/gpu/drm/i915/display/intel_panel.c b/drivers/gpu/drm/i915/display/intel_panel.c index 9f23bac0d7924d..d64fce1a17cbc5 100644 --- a/drivers/gpu/drm/i915/display/intel_panel.c +++ b/drivers/gpu/drm/i915/display/intel_panel.c @@ -1650,16 +1650,13 @@ static int lpt_setup_backlight(struct intel_connector *connector, enum pipe unus val = pch_get_backlight(connector); else val = lpt_get_backlight(connector); - val = intel_panel_compute_brightness(connector, val); - panel->backlight.level = clamp(val, panel->backlight.min, - panel->backlight.max); if (cpu_mode) { drm_dbg_kms(&dev_priv->drm, "CPU backlight register was enabled, switching to PCH override\n"); /* Write converted CPU PWM value to PCH override register */ - lpt_set_backlight(connector->base.state, panel->backlight.level); + lpt_set_backlight(connector->base.state, val); intel_de_write(dev_priv, BLC_PWM_PCH_CTL1, pch_ctl1 | BLM_PCH_OVERRIDE_ENABLE); @@ -1667,6 +1664,10 @@ static int lpt_setup_backlight(struct intel_connector *connector, enum pipe unus cpu_ctl2 & ~BLM_PWM_ENABLE); } + val = intel_panel_compute_brightness(connector, val); + panel->backlight.level = clamp(val, panel->backlight.min, + panel->backlight.max); + return 0; } diff --git a/drivers/gpu/drm/i915/display/vlv_dsi.c b/drivers/gpu/drm/i915/display/vlv_dsi.c index 5e5522923b1e46..690239d3f2e532 100644 --- a/drivers/gpu/drm/i915/display/vlv_dsi.c +++ b/drivers/gpu/drm/i915/display/vlv_dsi.c @@ -812,10 +812,20 @@ static void intel_dsi_pre_enable(struct intel_atomic_state *state, intel_dsi_prepare(encoder, pipe_config); intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_POWER_ON); - intel_dsi_msleep(intel_dsi, intel_dsi->panel_on_delay); - /* Deassert reset */ - intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_DEASSERT_RESET); + /* + * Give the panel time to power-on and then deassert its reset. + * Depending on the VBT MIPI sequences version the deassert-seq + * may contain the necessary delay, intel_dsi_msleep() will skip + * the delay in that case. If there is no deassert-seq, then an + * unconditional msleep is used to give the panel time to power-on. + */ + if (dev_priv->vbt.dsi.sequence[MIPI_SEQ_DEASSERT_RESET]) { + intel_dsi_msleep(intel_dsi, intel_dsi->panel_on_delay); + intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_DEASSERT_RESET); + } else { + msleep(intel_dsi->panel_on_delay); + } if (IS_GEMINILAKE(dev_priv)) { glk_cold_boot = glk_dsi_enable_io(encoder); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index b07dc1156a0e6e..bd3046e5a93480 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -382,7 +382,7 @@ eb_vma_misplaced(const struct drm_i915_gem_exec_object2 *entry, return true; if (!(flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) && - (vma->node.start + vma->node.size - 1) >> 32) + (vma->node.start + vma->node.size + 4095) >> 32) return true; if (flags & __EXEC_OBJECT_NEEDS_MAP && @@ -1046,7 +1046,7 @@ static void reloc_gpu_flush(struct i915_execbuffer *eb, struct reloc_cache *cach GEM_BUG_ON(cache->rq_size >= obj->base.size / sizeof(u32)); cache->rq_cmd[cache->rq_size] = MI_BATCH_BUFFER_END; - __i915_gem_object_flush_map(obj, 0, sizeof(u32) * (cache->rq_size + 1)); + i915_gem_object_flush_map(obj); i915_gem_object_unpin_map(obj); intel_gt_chipset_flush(cache->rq->engine->gt); @@ -1296,6 +1296,8 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, goto err_pool; } + memset32(cmd, 0, pool->obj->base.size / sizeof(u32)); + batch = i915_vma_instance(pool->obj, vma->vm, NULL); if (IS_ERR(batch)) { err = PTR_ERR(batch); diff --git a/drivers/gpu/drm/i915/gt/gen7_renderclear.c b/drivers/gpu/drm/i915/gt/gen7_renderclear.c index d93d85cd30270b..e961ad6a312944 100644 --- a/drivers/gpu/drm/i915/gt/gen7_renderclear.c +++ b/drivers/gpu/drm/i915/gt/gen7_renderclear.c @@ -7,8 +7,6 @@ #include "i915_drv.h" #include "intel_gpu_commands.h" -#define MAX_URB_ENTRIES 64 -#define STATE_SIZE (4 * 1024) #define GT3_INLINE_DATA_DELAYS 0x1E00 #define batch_advance(Y, CS) GEM_BUG_ON((Y)->end != (CS)) @@ -34,38 +32,59 @@ struct batch_chunk { }; struct batch_vals { - u32 max_primitives; - u32 max_urb_entries; - u32 cmd_size; - u32 state_size; + u32 max_threads; u32 state_start; - u32 batch_size; + u32 surface_start; u32 surface_height; u32 surface_width; - u32 scratch_size; - u32 max_size; + u32 size; }; +static inline int num_primitives(const struct batch_vals *bv) +{ + /* + * We need to saturate the GPU with work in order to dispatch + * a shader on every HW thread, and clear the thread-local registers. + * In short, we have to dispatch work faster than the shaders can + * run in order to fill the EU and occupy each HW thread. + */ + return bv->max_threads; +} + static void batch_get_defaults(struct drm_i915_private *i915, struct batch_vals *bv) { if (IS_HASWELL(i915)) { - bv->max_primitives = 280; - bv->max_urb_entries = MAX_URB_ENTRIES; + switch (INTEL_INFO(i915)->gt) { + default: + case 1: + bv->max_threads = 70; + break; + case 2: + bv->max_threads = 140; + break; + case 3: + bv->max_threads = 280; + break; + } bv->surface_height = 16 * 16; bv->surface_width = 32 * 2 * 16; } else { - bv->max_primitives = 128; - bv->max_urb_entries = MAX_URB_ENTRIES / 2; + switch (INTEL_INFO(i915)->gt) { + default: + case 1: /* including vlv */ + bv->max_threads = 36; + break; + case 2: + bv->max_threads = 128; + break; + } bv->surface_height = 16 * 8; bv->surface_width = 32 * 16; } - bv->cmd_size = bv->max_primitives * 4096; - bv->state_size = STATE_SIZE; - bv->state_start = bv->cmd_size; - bv->batch_size = bv->cmd_size + bv->state_size; - bv->scratch_size = bv->surface_height * bv->surface_width; - bv->max_size = bv->batch_size + bv->scratch_size; + bv->state_start = round_up(SZ_1K + num_primitives(bv) * 64, SZ_4K); + bv->surface_start = bv->state_start + SZ_4K; + bv->size = bv->surface_start + bv->surface_height * bv->surface_width; } static void batch_init(struct batch_chunk *bc, @@ -155,7 +174,8 @@ static u32 gen7_fill_binding_table(struct batch_chunk *state, const struct batch_vals *bv) { - u32 surface_start = gen7_fill_surface_state(state, bv->batch_size, bv); + u32 surface_start = + gen7_fill_surface_state(state, bv->surface_start, bv); u32 *cs = batch_alloc_items(state, 32, 8); u32 offset = batch_offset(state, cs); @@ -214,9 +234,9 @@ static void gen7_emit_state_base_address(struct batch_chunk *batch, u32 surface_state_base) { - u32 *cs = batch_alloc_items(batch, 0, 12); + u32 *cs = batch_alloc_items(batch, 0, 10); - *cs++ = STATE_BASE_ADDRESS | (12 - 2); + *cs++ = STATE_BASE_ADDRESS | (10 - 2); /* general */ *cs++ = batch_addr(batch) | BASE_ADDRESS_MODIFY; /* surface */ @@ -233,8 +253,6 @@ gen7_emit_state_base_address(struct batch_chunk *batch, *cs++ = BASE_ADDRESS_MODIFY; *cs++ = 0; *cs++ = BASE_ADDRESS_MODIFY; - *cs++ = 0; - *cs++ = 0; batch_advance(batch, cs); } @@ -244,8 +262,7 @@ gen7_emit_vfe_state(struct batch_chunk *batch, u32 urb_size, u32 curbe_size, u32 mode) { - u32 urb_entries = bv->max_urb_entries; - u32 threads = bv->max_primitives - 1; + u32 threads = bv->max_threads - 1; u32 *cs = batch_alloc_items(batch, 32, 8); *cs++ = MEDIA_VFE_STATE | (8 - 2); @@ -254,7 +271,7 @@ gen7_emit_vfe_state(struct batch_chunk *batch, *cs++ = 0; /* number of threads & urb entries for GPGPU vs Media Mode */ - *cs++ = threads << 16 | urb_entries << 8 | mode << 2; + *cs++ = threads << 16 | 1 << 8 | mode << 2; *cs++ = 0; @@ -293,17 +310,12 @@ gen7_emit_media_object(struct batch_chunk *batch, { unsigned int x_offset = (media_object_index % 16) * 64; unsigned int y_offset = (media_object_index / 16) * 16; - unsigned int inline_data_size; - unsigned int media_batch_size; - unsigned int i; + unsigned int pkt = 6 + 3; u32 *cs; - inline_data_size = 112 * 8; - media_batch_size = inline_data_size + 6; - - cs = batch_alloc_items(batch, 8, media_batch_size); + cs = batch_alloc_items(batch, 8, pkt); - *cs++ = MEDIA_OBJECT | (media_batch_size - 2); + *cs++ = MEDIA_OBJECT | (pkt - 2); /* interface descriptor offset */ *cs++ = 0; @@ -317,25 +329,44 @@ gen7_emit_media_object(struct batch_chunk *batch, *cs++ = 0; /* inline */ - *cs++ = (y_offset << 16) | (x_offset); + *cs++ = y_offset << 16 | x_offset; *cs++ = 0; *cs++ = GT3_INLINE_DATA_DELAYS; - for (i = 3; i < inline_data_size; i++) - *cs++ = 0; batch_advance(batch, cs); } static void gen7_emit_pipeline_flush(struct batch_chunk *batch) { - u32 *cs = batch_alloc_items(batch, 0, 5); + u32 *cs = batch_alloc_items(batch, 0, 4); - *cs++ = GFX_OP_PIPE_CONTROL(5); - *cs++ = PIPE_CONTROL_STATE_CACHE_INVALIDATE | - PIPE_CONTROL_GLOBAL_GTT_IVB; + *cs++ = GFX_OP_PIPE_CONTROL(4); + *cs++ = PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | + PIPE_CONTROL_DEPTH_CACHE_FLUSH | + PIPE_CONTROL_DC_FLUSH_ENABLE | + PIPE_CONTROL_CS_STALL; *cs++ = 0; *cs++ = 0; + + batch_advance(batch, cs); +} + +static void gen7_emit_pipeline_invalidate(struct batch_chunk *batch) +{ + u32 *cs = batch_alloc_items(batch, 0, 8); + + /* ivb: Stall before STATE_CACHE_INVALIDATE */ + *cs++ = GFX_OP_PIPE_CONTROL(4); + *cs++ = PIPE_CONTROL_STALL_AT_SCOREBOARD | + PIPE_CONTROL_CS_STALL; + *cs++ = 0; + *cs++ = 0; + + *cs++ = GFX_OP_PIPE_CONTROL(4); + *cs++ = PIPE_CONTROL_STATE_CACHE_INVALIDATE; *cs++ = 0; + *cs++ = 0; + batch_advance(batch, cs); } @@ -344,34 +375,46 @@ static void emit_batch(struct i915_vma * const vma, const struct batch_vals *bv) { struct drm_i915_private *i915 = vma->vm->i915; - unsigned int desc_count = 64; - const u32 urb_size = 112; + const unsigned int desc_count = 1; + const unsigned int urb_size = 1; struct batch_chunk cmds, state; - u32 interface_descriptor; + u32 descriptors; unsigned int i; - batch_init(&cmds, vma, start, 0, bv->cmd_size); - batch_init(&state, vma, start, bv->state_start, bv->state_size); - - interface_descriptor = - gen7_fill_interface_descriptor(&state, bv, - IS_HASWELL(i915) ? - &cb_kernel_hsw : - &cb_kernel_ivb, - desc_count); + batch_init(&cmds, vma, start, 0, bv->state_start); + batch_init(&state, vma, start, bv->state_start, SZ_4K); + + descriptors = gen7_fill_interface_descriptor(&state, bv, + IS_HASWELL(i915) ? + &cb_kernel_hsw : + &cb_kernel_ivb, + desc_count); + + /* Reset inherited context registers */ + gen7_emit_pipeline_invalidate(&cmds); + batch_add(&cmds, MI_LOAD_REGISTER_IMM(2)); + batch_add(&cmds, i915_mmio_reg_offset(CACHE_MODE_0_GEN7)); + batch_add(&cmds, 0xffff0000); + batch_add(&cmds, i915_mmio_reg_offset(CACHE_MODE_1)); + batch_add(&cmds, 0xffff0000 | PIXEL_SUBSPAN_COLLECT_OPT_DISABLE); gen7_emit_pipeline_flush(&cmds); + + /* Switch to the media pipeline and our base address */ + gen7_emit_pipeline_invalidate(&cmds); batch_add(&cmds, PIPELINE_SELECT | PIPELINE_SELECT_MEDIA); batch_add(&cmds, MI_NOOP); - gen7_emit_state_base_address(&cmds, interface_descriptor); + gen7_emit_pipeline_invalidate(&cmds); + gen7_emit_pipeline_flush(&cmds); + gen7_emit_state_base_address(&cmds, descriptors); + gen7_emit_pipeline_invalidate(&cmds); + /* Set the clear-residual kernel state */ gen7_emit_vfe_state(&cmds, bv, urb_size - 1, 0, 0); + gen7_emit_interface_descriptor_load(&cmds, descriptors, desc_count); - gen7_emit_interface_descriptor_load(&cmds, - interface_descriptor, - desc_count); - - for (i = 0; i < bv->max_primitives; i++) + /* Execute the kernel on all HW threads */ + for (i = 0; i < num_primitives(bv); i++) gen7_emit_media_object(&cmds, i); batch_add(&cmds, MI_BATCH_BUFFER_END); @@ -385,15 +428,15 @@ int gen7_setup_clear_gpr_bb(struct intel_engine_cs * const engine, batch_get_defaults(engine->i915, &bv); if (!vma) - return bv.max_size; + return bv.size; - GEM_BUG_ON(vma->obj->base.size < bv.max_size); + GEM_BUG_ON(vma->obj->base.size < bv.size); batch = i915_gem_object_pin_map(vma->obj, I915_MAP_WC); if (IS_ERR(batch)) return PTR_ERR(batch); - emit_batch(vma, memset(batch, 0, bv.max_size), &bv); + emit_batch(vma, memset(batch, 0, bv.size), &bv); i915_gem_object_flush_map(vma->obj); __i915_gem_object_release_map(vma->obj); diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c index a24cc1ff08a0c4..0625cbb3b43120 100644 --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c @@ -134,11 +134,6 @@ static bool remove_signaling_context(struct intel_breadcrumbs *b, return true; } -static inline bool __request_completed(const struct i915_request *rq) -{ - return i915_seqno_passed(__hwsp_seqno(rq), rq->fence.seqno); -} - __maybe_unused static bool check_signal_order(struct intel_context *ce, struct i915_request *rq) { @@ -257,7 +252,7 @@ static void signal_irq_work(struct irq_work *work) list_for_each_entry_rcu(rq, &ce->signals, signal_link) { bool release; - if (!__request_completed(rq)) + if (!__i915_request_is_complete(rq)) break; if (!test_and_clear_bit(I915_FENCE_FLAG_SIGNAL, @@ -379,7 +374,7 @@ static void insert_breadcrumb(struct i915_request *rq) * straight onto a signaled list, and queue the irq worker for * its signal completion. */ - if (__request_completed(rq)) { + if (__i915_request_is_complete(rq)) { if (__signal_request(rq) && llist_add(&rq->signal_node, &b->signaled_requests)) irq_work_queue(&b->irq_work); diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index 81c05f551b9c88..060f826b1d52e1 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -526,16 +526,39 @@ static int init_ggtt(struct i915_ggtt *ggtt) mutex_init(&ggtt->error_mutex); if (ggtt->mappable_end) { - /* Reserve a mappable slot for our lockless error capture */ - ret = drm_mm_insert_node_in_range(&ggtt->vm.mm, - &ggtt->error_capture, - PAGE_SIZE, 0, - I915_COLOR_UNEVICTABLE, - 0, ggtt->mappable_end, - DRM_MM_INSERT_LOW); - if (ret) - return ret; + /* + * Reserve a mappable slot for our lockless error capture. + * + * We strongly prefer taking address 0x0 in order to protect + * other critical buffers against accidental overwrites, + * as writing to address 0 is a very common mistake. + * + * Since 0 may already be in use by the system (e.g. the BIOS + * framebuffer), we let the reservation fail quietly and hope + * 0 remains reserved always. + * + * If we fail to reserve 0, and then fail to find any space + * for an error-capture, remain silent. We can afford not + * to reserve an error_capture node as we have fallback + * paths, and we trust that 0 will remain reserved. However, + * the only likely reason for failure to insert is a driver + * bug, which we expect to cause other failures... + */ + ggtt->error_capture.size = I915_GTT_PAGE_SIZE; + ggtt->error_capture.color = I915_COLOR_UNEVICTABLE; + if (drm_mm_reserve_node(&ggtt->vm.mm, &ggtt->error_capture)) + drm_mm_insert_node_in_range(&ggtt->vm.mm, + &ggtt->error_capture, + ggtt->error_capture.size, 0, + ggtt->error_capture.color, + 0, ggtt->mappable_end, + DRM_MM_INSERT_LOW); } + if (drm_mm_node_allocated(&ggtt->error_capture)) + drm_dbg(&ggtt->vm.i915->drm, + "Reserved GGTT:[%llx, %llx] for use by error capture\n", + ggtt->error_capture.start, + ggtt->error_capture.start + ggtt->error_capture.size); /* * The upper portion of the GuC address space has a sizeable hole @@ -548,9 +571,9 @@ static int init_ggtt(struct i915_ggtt *ggtt) /* Clear any non-preallocated blocks */ drm_mm_for_each_hole(entry, &ggtt->vm.mm, hole_start, hole_end) { - drm_dbg_kms(&ggtt->vm.i915->drm, - "clearing unused GTT space: [%lx, %lx]\n", - hole_start, hole_end); + drm_dbg(&ggtt->vm.i915->drm, + "clearing unused GTT space: [%lx, %lx]\n", + hole_start, hole_end); ggtt->vm.clear_range(&ggtt->vm, hole_start, hole_end - hole_start); } diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 724b2cb897d33f..ee9b33c3aff837 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -3936,6 +3936,9 @@ static int lrc_setup_wa_ctx(struct intel_engine_cs *engine) static void lrc_destroy_wa_ctx(struct intel_engine_cs *engine) { i915_vma_unpin_and_release(&engine->wa_ctx.vma, 0); + + /* Called on error unwind, clear all flags to prevent further use */ + memset(&engine->wa_ctx, 0, sizeof(engine->wa_ctx)); } typedef u32 *(*wa_bb_func_t)(struct intel_engine_cs *engine, u32 *batch); diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c index 16b48e72c36910..6aaca73eaee60b 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c @@ -32,6 +32,7 @@ #include "gen6_ppgtt.h" #include "gen7_renderclear.h" #include "i915_drv.h" +#include "i915_mitigations.h" #include "intel_breadcrumbs.h" #include "intel_context.h" #include "intel_gt.h" @@ -885,7 +886,8 @@ static int switch_context(struct i915_request *rq) GEM_BUG_ON(HAS_EXECLISTS(engine->i915)); if (engine->wa_ctx.vma && ce != engine->kernel_context) { - if (engine->wa_ctx.vma->private != ce) { + if (engine->wa_ctx.vma->private != ce && + i915_mitigate_clear_residuals()) { ret = clear_residuals(rq); if (ret) return ret; @@ -1289,7 +1291,7 @@ int intel_ring_submission_setup(struct intel_engine_cs *engine) GEM_BUG_ON(timeline->hwsp_ggtt != engine->status_page.vma); - if (IS_HASWELL(engine->i915) && engine->class == RENDER_CLASS) { + if (IS_GEN(engine->i915, 7) && engine->class == RENDER_CLASS) { err = gen7_ctx_switch_bb_init(engine); if (err) goto err_ring_unpin; diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c index 7ea94d201fe6fb..8015964043eb7a 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline.c +++ b/drivers/gpu/drm/i915/gt/intel_timeline.c @@ -126,6 +126,10 @@ static void __rcu_cacheline_free(struct rcu_head *rcu) struct intel_timeline_cacheline *cl = container_of(rcu, typeof(*cl), rcu); + /* Must wait until after all *rq->hwsp are complete before removing */ + i915_gem_object_unpin_map(cl->hwsp->vma->obj); + __idle_hwsp_free(cl->hwsp, ptr_unmask_bits(cl->vaddr, CACHELINE_BITS)); + i915_active_fini(&cl->active); kfree(cl); } @@ -133,11 +137,6 @@ static void __rcu_cacheline_free(struct rcu_head *rcu) static void __idle_cacheline_free(struct intel_timeline_cacheline *cl) { GEM_BUG_ON(!i915_active_is_idle(&cl->active)); - - i915_gem_object_unpin_map(cl->hwsp->vma->obj); - i915_vma_put(cl->hwsp->vma); - __idle_hwsp_free(cl->hwsp, ptr_unmask_bits(cl->vaddr, CACHELINE_BITS)); - call_rcu(&cl->rcu, __rcu_cacheline_free); } @@ -179,7 +178,6 @@ cacheline_alloc(struct intel_timeline_hwsp *hwsp, unsigned int cacheline) return ERR_CAST(vaddr); } - i915_vma_get(hwsp->vma); cl->hwsp = hwsp; cl->vaddr = page_pack_bits(vaddr, cacheline); diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c index 10a865f3dc09ac..9ed19b8bca6009 100644 --- a/drivers/gpu/drm/i915/i915_active.c +++ b/drivers/gpu/drm/i915/i915_active.c @@ -631,24 +631,26 @@ static int flush_lazy_signals(struct i915_active *ref) int __i915_active_wait(struct i915_active *ref, int state) { - int err; - might_sleep(); - if (!i915_active_acquire_if_busy(ref)) - return 0; - /* Any fence added after the wait begins will not be auto-signaled */ - err = flush_lazy_signals(ref); - i915_active_release(ref); - if (err) - return err; + if (i915_active_acquire_if_busy(ref)) { + int err; - if (!i915_active_is_idle(ref) && - ___wait_var_event(ref, i915_active_is_idle(ref), - state, 0, 0, schedule())) - return -EINTR; + err = flush_lazy_signals(ref); + i915_active_release(ref); + if (err) + return err; + if (___wait_var_event(ref, i915_active_is_idle(ref), + state, 0, 0, schedule())) + return -EINTR; + } + + /* + * After the wait is complete, the caller may free the active. + * We have to flush any concurrent retirement before returning. + */ flush_work(&ref->work); return 0; } diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index e88970256e8ef3..e7362ec22aded3 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -1166,7 +1166,7 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj, } } if (IS_ERR(src)) { - unsigned long x, n; + unsigned long x, n, remain; void *ptr; /* @@ -1177,14 +1177,15 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj, * We don't care about copying too much here as we only * validate up to the end of the batch. */ + remain = length; if (!(dst_obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ)) - length = round_up(length, + remain = round_up(remain, boot_cpu_data.x86_clflush_size); ptr = dst; x = offset_in_page(offset); - for (n = offset >> PAGE_SHIFT; length; n++) { - int len = min(length, PAGE_SIZE - x); + for (n = offset >> PAGE_SHIFT; remain; n++) { + int len = min(remain, PAGE_SIZE - x); src = kmap_atomic(i915_gem_object_get_page(src_obj, n)); if (needs_clflush) @@ -1193,13 +1194,15 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj, kunmap_atomic(src); ptr += len; - length -= len; + remain -= len; x = 0; } } i915_gem_object_unpin_pages(src_obj); + memset32(dst + length, 0, (dst_obj->base.size - length) / sizeof(u32)); + /* dst_obj is returned with vmap pinned */ return dst; } @@ -1392,11 +1395,6 @@ static unsigned long *alloc_whitelist(u32 batch_length) #define LENGTH_BIAS 2 -static bool shadow_needs_clflush(struct drm_i915_gem_object *obj) -{ - return !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE); -} - /** * intel_engine_cmd_parser() - parse a batch buffer for privilege violations * @engine: the engine on which the batch is to execute @@ -1539,16 +1537,9 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine, ret = 0; /* allow execution */ } } - - if (shadow_needs_clflush(shadow->obj)) - drm_clflush_virt_range(batch_end, 8); } - if (shadow_needs_clflush(shadow->obj)) { - void *ptr = page_mask_bits(shadow->obj->mm.mapping); - - drm_clflush_virt_range(ptr, (void *)(cmd + 1) - ptr); - } + i915_gem_object_flush_map(shadow->obj); if (!IS_ERR_OR_NULL(jump_whitelist)) kfree(jump_whitelist); diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index acc32066cec357..382cf048eefe07 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -577,8 +577,6 @@ static int i915_driver_hw_probe(struct drm_i915_private *dev_priv) pci_set_master(pdev); - cpu_latency_qos_add_request(&dev_priv->pm_qos, PM_QOS_DEFAULT_VALUE); - intel_gt_init_workarounds(dev_priv); /* On the 945G/GM, the chipset reports the MSI capability on the @@ -623,7 +621,6 @@ static int i915_driver_hw_probe(struct drm_i915_private *dev_priv) err_msi: if (pdev->msi_enabled) pci_disable_msi(pdev); - cpu_latency_qos_remove_request(&dev_priv->pm_qos); err_mem_regions: intel_memory_regions_driver_release(dev_priv); err_ggtt: @@ -645,8 +642,6 @@ static void i915_driver_hw_remove(struct drm_i915_private *dev_priv) if (pdev->msi_enabled) pci_disable_msi(pdev); - - cpu_latency_qos_remove_request(&dev_priv->pm_qos); } /** diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 8426d59746693f..fa830e77bb6483 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -892,9 +892,6 @@ struct drm_i915_private { bool display_irqs_enabled; - /* To control wakeup latency, e.g. for irq-driven dp aux transfers. */ - struct pm_qos_request pm_qos; - /* Sideband mailbox protection */ struct mutex sb_lock; struct pm_qos_request sb_qos; @@ -1350,7 +1347,7 @@ intel_subplatform(const struct intel_runtime_info *info, enum intel_platform p) { const unsigned int pi = __platform_mask_index(info, p); - return info->platform_mask[pi] & INTEL_SUBPLATFORM_BITS; + return info->platform_mask[pi] & ((1 << INTEL_SUBPLATFORM_BITS) - 1); } static __always_inline bool diff --git a/drivers/gpu/drm/i915/i915_mitigations.c b/drivers/gpu/drm/i915/i915_mitigations.c new file mode 100644 index 00000000000000..84f12598d14584 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_mitigations.c @@ -0,0 +1,146 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021 Intel Corporation + */ + +#include +#include +#include +#include + +#include "i915_drv.h" +#include "i915_mitigations.h" + +static unsigned long mitigations __read_mostly = ~0UL; + +enum { + CLEAR_RESIDUALS = 0, +}; + +static const char * const names[] = { + [CLEAR_RESIDUALS] = "residuals", +}; + +bool i915_mitigate_clear_residuals(void) +{ + return READ_ONCE(mitigations) & BIT(CLEAR_RESIDUALS); +} + +static int mitigations_set(const char *val, const struct kernel_param *kp) +{ + unsigned long new = ~0UL; + char *str, *sep, *tok; + bool first = true; + int err = 0; + + BUILD_BUG_ON(ARRAY_SIZE(names) >= BITS_PER_TYPE(mitigations)); + + str = kstrdup(val, GFP_KERNEL); + if (!str) + return -ENOMEM; + + for (sep = str; (tok = strsep(&sep, ","));) { + bool enable = true; + int i; + + /* Be tolerant of leading/trailing whitespace */ + tok = strim(tok); + + if (first) { + first = false; + + if (!strcmp(tok, "auto")) + continue; + + new = 0; + if (!strcmp(tok, "off")) + continue; + } + + if (*tok == '!') { + enable = !enable; + tok++; + } + + if (!strncmp(tok, "no", 2)) { + enable = !enable; + tok += 2; + } + + if (*tok == '\0') + continue; + + for (i = 0; i < ARRAY_SIZE(names); i++) { + if (!strcmp(tok, names[i])) { + if (enable) + new |= BIT(i); + else + new &= ~BIT(i); + break; + } + } + if (i == ARRAY_SIZE(names)) { + pr_err("Bad \"%s.mitigations=%s\", '%s' is unknown\n", + DRIVER_NAME, val, tok); + err = -EINVAL; + break; + } + } + kfree(str); + if (err) + return err; + + WRITE_ONCE(mitigations, new); + return 0; +} + +static int mitigations_get(char *buffer, const struct kernel_param *kp) +{ + unsigned long local = READ_ONCE(mitigations); + int count, i; + bool enable; + + if (!local) + return scnprintf(buffer, PAGE_SIZE, "%s\n", "off"); + + if (local & BIT(BITS_PER_LONG - 1)) { + count = scnprintf(buffer, PAGE_SIZE, "%s,", "auto"); + enable = false; + } else { + enable = true; + count = 0; + } + + for (i = 0; i < ARRAY_SIZE(names); i++) { + if ((local & BIT(i)) != enable) + continue; + + count += scnprintf(buffer + count, PAGE_SIZE - count, + "%s%s,", enable ? "" : "!", names[i]); + } + + buffer[count - 1] = '\n'; + return count; +} + +static const struct kernel_param_ops ops = { + .set = mitigations_set, + .get = mitigations_get, +}; + +module_param_cb_unsafe(mitigations, &ops, NULL, 0600); +MODULE_PARM_DESC(mitigations, +"Selectively enable security mitigations for all Intel® GPUs in the system.\n" +"\n" +" auto -- enables all mitigations required for the platform [default]\n" +" off -- disables all mitigations\n" +"\n" +"Individual mitigations can be enabled by passing a comma-separated string,\n" +"e.g. mitigations=residuals to enable only clearing residuals or\n" +"mitigations=auto,noresiduals to disable only the clear residual mitigation.\n" +"Either '!' or 'no' may be used to switch from enabling the mitigation to\n" +"disabling it.\n" +"\n" +"Active mitigations for Ivybridge, Baytrail, Haswell:\n" +" residuals -- clear all thread-local registers between contexts" +); diff --git a/drivers/gpu/drm/i915/i915_mitigations.h b/drivers/gpu/drm/i915/i915_mitigations.h new file mode 100644 index 00000000000000..1359d8135287ac --- /dev/null +++ b/drivers/gpu/drm/i915/i915_mitigations.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#ifndef __I915_MITIGATIONS_H__ +#define __I915_MITIGATIONS_H__ + +#include + +bool i915_mitigate_clear_residuals(void); + +#endif /* __I915_MITIGATIONS_H__ */ diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index 69c0fa20eba17b..3c9ac6649ead35 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -184,13 +184,24 @@ static u64 get_rc6(struct intel_gt *gt) return val; } -static void park_rc6(struct drm_i915_private *i915) +static void init_rc6(struct i915_pmu *pmu) { - struct i915_pmu *pmu = &i915->pmu; + struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu); + intel_wakeref_t wakeref; - if (pmu->enable & config_enabled_mask(I915_PMU_RC6_RESIDENCY)) + with_intel_runtime_pm(i915->gt.uncore->rpm, wakeref) { pmu->sample[__I915_SAMPLE_RC6].cur = __get_rc6(&i915->gt); + pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur = + pmu->sample[__I915_SAMPLE_RC6].cur; + pmu->sleep_last = ktime_get(); + } +} +static void park_rc6(struct drm_i915_private *i915) +{ + struct i915_pmu *pmu = &i915->pmu; + + pmu->sample[__I915_SAMPLE_RC6].cur = __get_rc6(&i915->gt); pmu->sleep_last = ktime_get(); } @@ -201,6 +212,7 @@ static u64 get_rc6(struct intel_gt *gt) return __get_rc6(gt); } +static void init_rc6(struct i915_pmu *pmu) { } static void park_rc6(struct drm_i915_private *i915) {} #endif @@ -613,10 +625,8 @@ static void i915_pmu_enable(struct perf_event *event) container_of(event->pmu, typeof(*i915), pmu.base); unsigned int bit = event_enabled_bit(event); struct i915_pmu *pmu = &i915->pmu; - intel_wakeref_t wakeref; unsigned long flags; - wakeref = intel_runtime_pm_get(&i915->runtime_pm); spin_lock_irqsave(&pmu->lock, flags); /* @@ -627,13 +637,6 @@ static void i915_pmu_enable(struct perf_event *event) GEM_BUG_ON(bit >= ARRAY_SIZE(pmu->enable_count)); GEM_BUG_ON(pmu->enable_count[bit] == ~0); - if (pmu->enable_count[bit] == 0 && - config_enabled_mask(I915_PMU_RC6_RESIDENCY) & BIT_ULL(bit)) { - pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur = 0; - pmu->sample[__I915_SAMPLE_RC6].cur = __get_rc6(&i915->gt); - pmu->sleep_last = ktime_get(); - } - pmu->enable |= BIT_ULL(bit); pmu->enable_count[bit]++; @@ -674,8 +677,6 @@ static void i915_pmu_enable(struct perf_event *event) * an existing non-zero value. */ local64_set(&event->hw.prev_count, __i915_pmu_event_read(event)); - - intel_runtime_pm_put(&i915->runtime_pm, wakeref); } static void i915_pmu_disable(struct perf_event *event) @@ -1101,6 +1102,7 @@ void i915_pmu_register(struct drm_i915_private *i915) hrtimer_init(&pmu->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); pmu->timer.function = i915_sample; pmu->cpuhp.slot = CPUHP_INVALID; + init_rc6(pmu); if (!is_igp(i915)) { pmu->name = kasprintf(GFP_KERNEL, diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 620b6fab2c5cfb..92adfee30c7c02 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -434,7 +434,7 @@ static inline u32 hwsp_seqno(const struct i915_request *rq) static inline bool __i915_request_has_started(const struct i915_request *rq) { - return i915_seqno_passed(hwsp_seqno(rq), rq->fence.seqno - 1); + return i915_seqno_passed(__hwsp_seqno(rq), rq->fence.seqno - 1); } /** @@ -465,11 +465,19 @@ static inline bool __i915_request_has_started(const struct i915_request *rq) */ static inline bool i915_request_started(const struct i915_request *rq) { + bool result; + if (i915_request_signaled(rq)) return true; - /* Remember: started but may have since been preempted! */ - return __i915_request_has_started(rq); + result = true; + rcu_read_lock(); /* the HWSP may be freed at runtime */ + if (likely(!i915_request_signaled(rq))) + /* Remember: started but may have since been preempted! */ + result = __i915_request_has_started(rq); + rcu_read_unlock(); + + return result; } /** @@ -482,10 +490,16 @@ static inline bool i915_request_started(const struct i915_request *rq) */ static inline bool i915_request_is_running(const struct i915_request *rq) { + bool result; + if (!i915_request_is_active(rq)) return false; - return __i915_request_has_started(rq); + rcu_read_lock(); + result = __i915_request_has_started(rq) && i915_request_is_active(rq); + rcu_read_unlock(); + + return result; } /** @@ -509,12 +523,25 @@ static inline bool i915_request_is_ready(const struct i915_request *rq) return !list_empty(&rq->sched.link); } +static inline bool __i915_request_is_complete(const struct i915_request *rq) +{ + return i915_seqno_passed(__hwsp_seqno(rq), rq->fence.seqno); +} + static inline bool i915_request_completed(const struct i915_request *rq) { + bool result; + if (i915_request_signaled(rq)) return true; - return i915_seqno_passed(hwsp_seqno(rq), rq->fence.seqno); + result = true; + rcu_read_lock(); /* the HWSP may be freed at runtime */ + if (likely(!i915_request_signaled(rq))) + result = __i915_request_is_complete(rq); + rcu_read_unlock(); + + return result; } static inline void i915_request_mark_complete(struct i915_request *rq) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index c53a222e3dece3..713770fb2b92d4 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -1880,7 +1880,7 @@ static int igt_cs_tlb(void *arg) vma = i915_vma_instance(out, vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); - goto out_put_batch; + goto out_put_out; } err = i915_vma_pin(vma, 0, 0, diff --git a/drivers/gpu/drm/imx/dcss/dcss-plane.c b/drivers/gpu/drm/imx/dcss/dcss-plane.c index 961d671f171b48..f54087ac44d35b 100644 --- a/drivers/gpu/drm/imx/dcss/dcss-plane.c +++ b/drivers/gpu/drm/imx/dcss/dcss-plane.c @@ -111,7 +111,8 @@ static bool dcss_plane_can_rotate(const struct drm_format_info *format, supported_rotation = DRM_MODE_ROTATE_0 | DRM_MODE_ROTATE_180 | DRM_MODE_REFLECT_MASK; else if (!format->is_yuv && - modifier == DRM_FORMAT_MOD_VIVANTE_TILED) + (modifier == DRM_FORMAT_MOD_VIVANTE_TILED || + modifier == DRM_FORMAT_MOD_VIVANTE_SUPER_TILED)) supported_rotation = DRM_MODE_ROTATE_MASK | DRM_MODE_REFLECT_MASK; else if (format->is_yuv && linear_format && @@ -273,6 +274,7 @@ static void dcss_plane_atomic_update(struct drm_plane *plane, u32 src_w, src_h, dst_w, dst_h; struct drm_rect src, dst; bool enable = true; + bool is_rotation_90_or_270; if (!fb || !state->crtc || !state->visible) return; @@ -311,8 +313,13 @@ static void dcss_plane_atomic_update(struct drm_plane *plane, dcss_plane_atomic_set_base(dcss_plane); + is_rotation_90_or_270 = state->rotation & (DRM_MODE_ROTATE_90 | + DRM_MODE_ROTATE_270); + dcss_scaler_setup(dcss->scaler, dcss_plane->ch_num, - state->fb->format, src_w, src_h, + state->fb->format, + is_rotation_90_or_270 ? src_h : src_w, + is_rotation_90_or_270 ? src_w : src_h, dst_w, dst_h, drm_mode_vrefresh(&crtc_state->mode)); diff --git a/drivers/gpu/drm/mcde/mcde_drv.c b/drivers/gpu/drm/mcde/mcde_drv.c index 92f8bd907193f5..210f5e16300811 100644 --- a/drivers/gpu/drm/mcde/mcde_drv.c +++ b/drivers/gpu/drm/mcde/mcde_drv.c @@ -331,8 +331,8 @@ static int mcde_probe(struct platform_device *pdev) } irq = platform_get_irq(pdev, 0); - if (!irq) { - ret = -EINVAL; + if (irq < 0) { + ret = irq; goto clk_disable; } diff --git a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c index 8eba44be3a8aec..3064eac1a75079 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c @@ -359,7 +359,7 @@ static const struct mtk_ddp_comp_funcs ddp_ufoe = { static const char * const mtk_ddp_comp_stem[MTK_DDP_COMP_TYPE_MAX] = { [MTK_DISP_OVL] = "ovl", - [MTK_DISP_OVL_2L] = "ovl_2l", + [MTK_DISP_OVL_2L] = "ovl-2l", [MTK_DISP_RDMA] = "rdma", [MTK_DISP_WDMA] = "wdma", [MTK_DISP_COLOR] = "color", diff --git a/drivers/gpu/drm/meson/meson_drv.c b/drivers/gpu/drm/meson/meson_drv.c index 8b9c8dd788c416..3d1de9cbb1c8d3 100644 --- a/drivers/gpu/drm/meson/meson_drv.c +++ b/drivers/gpu/drm/meson/meson_drv.c @@ -389,15 +389,17 @@ static void meson_drv_unbind(struct device *dev) meson_canvas_free(priv->canvas, priv->canvas_id_vd1_2); } + drm_dev_unregister(drm); + drm_kms_helper_poll_fini(drm); + drm_atomic_helper_shutdown(drm); + component_unbind_all(dev, drm); + drm_irq_uninstall(drm); + drm_dev_put(drm); + if (priv->afbcd.ops) { priv->afbcd.ops->reset(priv); meson_rdma_free(priv); } - - drm_dev_unregister(drm); - drm_irq_uninstall(drm); - drm_kms_helper_poll_fini(drm); - drm_dev_put(drm); } static const struct component_master_ops meson_drv_master_ops = { diff --git a/drivers/gpu/drm/meson/meson_dw_hdmi.c b/drivers/gpu/drm/meson/meson_dw_hdmi.c index 29a8ff41595d24..aad75a22dc3382 100644 --- a/drivers/gpu/drm/meson/meson_dw_hdmi.c +++ b/drivers/gpu/drm/meson/meson_dw_hdmi.c @@ -145,8 +145,6 @@ struct meson_dw_hdmi { struct reset_control *hdmitx_apb; struct reset_control *hdmitx_ctrl; struct reset_control *hdmitx_phy; - struct clk *hdmi_pclk; - struct clk *venci_clk; struct regulator *hdmi_supply; u32 irq_stat; struct dw_hdmi *hdmi; @@ -941,6 +939,34 @@ static void meson_dw_hdmi_init(struct meson_dw_hdmi *meson_dw_hdmi) } +static void meson_disable_regulator(void *data) +{ + regulator_disable(data); +} + +static void meson_disable_clk(void *data) +{ + clk_disable_unprepare(data); +} + +static int meson_enable_clk(struct device *dev, char *name) +{ + struct clk *clk; + int ret; + + clk = devm_clk_get(dev, name); + if (IS_ERR(clk)) { + dev_err(dev, "Unable to get %s pclk\n", name); + return PTR_ERR(clk); + } + + ret = clk_prepare_enable(clk); + if (!ret) + ret = devm_add_action_or_reset(dev, meson_disable_clk, clk); + + return ret; +} + static int meson_dw_hdmi_bind(struct device *dev, struct device *master, void *data) { @@ -989,6 +1015,10 @@ static int meson_dw_hdmi_bind(struct device *dev, struct device *master, ret = regulator_enable(meson_dw_hdmi->hdmi_supply); if (ret) return ret; + ret = devm_add_action_or_reset(dev, meson_disable_regulator, + meson_dw_hdmi->hdmi_supply); + if (ret) + return ret; } meson_dw_hdmi->hdmitx_apb = devm_reset_control_get_exclusive(dev, @@ -1017,19 +1047,17 @@ static int meson_dw_hdmi_bind(struct device *dev, struct device *master, if (IS_ERR(meson_dw_hdmi->hdmitx)) return PTR_ERR(meson_dw_hdmi->hdmitx); - meson_dw_hdmi->hdmi_pclk = devm_clk_get(dev, "isfr"); - if (IS_ERR(meson_dw_hdmi->hdmi_pclk)) { - dev_err(dev, "Unable to get HDMI pclk\n"); - return PTR_ERR(meson_dw_hdmi->hdmi_pclk); - } - clk_prepare_enable(meson_dw_hdmi->hdmi_pclk); + ret = meson_enable_clk(dev, "isfr"); + if (ret) + return ret; - meson_dw_hdmi->venci_clk = devm_clk_get(dev, "venci"); - if (IS_ERR(meson_dw_hdmi->venci_clk)) { - dev_err(dev, "Unable to get venci clk\n"); - return PTR_ERR(meson_dw_hdmi->venci_clk); - } - clk_prepare_enable(meson_dw_hdmi->venci_clk); + ret = meson_enable_clk(dev, "iahb"); + if (ret) + return ret; + + ret = meson_enable_clk(dev, "venci"); + if (ret) + return ret; dw_plat_data->regm = devm_regmap_init(dev, NULL, meson_dw_hdmi, &meson_dw_hdmi_regmap_config); @@ -1062,10 +1090,10 @@ static int meson_dw_hdmi_bind(struct device *dev, struct device *master, encoder->possible_crtcs = BIT(0); - DRM_DEBUG_DRIVER("encoder initialized\n"); - meson_dw_hdmi_init(meson_dw_hdmi); + DRM_DEBUG_DRIVER("encoder initialized\n"); + /* Bridge / Connector */ dw_plat_data->priv_data = meson_dw_hdmi; diff --git a/drivers/gpu/drm/msm/Kconfig b/drivers/gpu/drm/msm/Kconfig index e5816b4984942d..dabb4a1ccdcf70 100644 --- a/drivers/gpu/drm/msm/Kconfig +++ b/drivers/gpu/drm/msm/Kconfig @@ -4,8 +4,8 @@ config DRM_MSM tristate "MSM DRM" depends on DRM depends on ARCH_QCOM || SOC_IMX5 || (ARM && COMPILE_TEST) + depends on IOMMU_SUPPORT depends on OF && COMMON_CLK - depends on MMU depends on QCOM_OCMEM || QCOM_OCMEM=n select IOMMU_IO_PGTABLE select QCOM_MDT_LOADER if ARCH_QCOM diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c index d6804a8023555a..69ed2c6094665b 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c @@ -755,12 +755,8 @@ static int a5xx_hw_init(struct msm_gpu *gpu) gpu_write(gpu, REG_A5XX_CP_RB_CNTL, MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE); - /* Disable preemption if WHERE_AM_I isn't available */ - if (!a5xx_gpu->has_whereami && gpu->nr_rings > 1) { - a5xx_preempt_fini(gpu); - gpu->nr_rings = 1; - } else { - /* Create a privileged buffer for the RPTR shadow */ + /* Create a privileged buffer for the RPTR shadow */ + if (a5xx_gpu->has_whereami) { if (!a5xx_gpu->shadow_bo) { a5xx_gpu->shadow = msm_gem_kernel_new(gpu->dev, sizeof(u32) * gpu->nr_rings, @@ -774,6 +770,10 @@ static int a5xx_hw_init(struct msm_gpu *gpu) gpu_write64(gpu, REG_A5XX_CP_RB_RPTR_ADDR, REG_A5XX_CP_RB_RPTR_ADDR_HI, shadowptr(a5xx_gpu, gpu->rb[0])); + } else if (gpu->nr_rings > 1) { + /* Disable preemption if WHERE_AM_I isn't available */ + a5xx_preempt_fini(gpu); + gpu->nr_rings = 1; } a5xx_preempt_hw_init(gpu); @@ -1207,7 +1207,9 @@ static int a5xx_pm_resume(struct msm_gpu *gpu) static int a5xx_pm_suspend(struct msm_gpu *gpu) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); u32 mask = 0xf; + int i, ret; /* A510 has 3 XIN ports in VBIF */ if (adreno_is_a510(adreno_gpu)) @@ -1227,7 +1229,15 @@ static int a5xx_pm_suspend(struct msm_gpu *gpu) gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x003C0000); gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x00000000); - return msm_gpu_pm_suspend(gpu); + ret = msm_gpu_pm_suspend(gpu); + if (ret) + return ret; + + if (a5xx_gpu->has_whereami) + for (i = 0; i < gpu->nr_rings; i++) + a5xx_gpu->shadow[i] = 0; + + return 0; } static int a5xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value) diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index 948f3656c20cad..420ca4a0eb5f7e 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -1045,12 +1045,21 @@ static int a6xx_pm_suspend(struct msm_gpu *gpu) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); + int i, ret; trace_msm_gpu_suspend(0); devfreq_suspend_device(gpu->devfreq.devfreq); - return a6xx_gmu_stop(a6xx_gpu); + ret = a6xx_gmu_stop(a6xx_gpu); + if (ret) + return ret; + + if (adreno_gpu->base.hw_apriv || a6xx_gpu->has_whereami) + for (i = 0; i < gpu->nr_rings; i++) + a6xx_gpu->shadow[i] = 0; + + return 0; } static int a6xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.c index 393858ef8a8325..37c8270681c23a 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.c @@ -219,9 +219,6 @@ static int _dpu_core_perf_crtc_update_bus(struct dpu_kms *kms, int i, ret = 0; u64 avg_bw; - if (!kms->num_paths) - return -EINVAL; - drm_for_each_crtc(tmp_crtc, crtc->dev) { if (tmp_crtc->enabled && curr_client_type == @@ -239,6 +236,9 @@ static int _dpu_core_perf_crtc_update_bus(struct dpu_kms *kms, } } + if (!kms->num_paths) + return 0; + avg_bw = perf.bw_ctl; do_div(avg_bw, (kms->num_paths * 1000)); /*Bps_to_icc*/ diff --git a/drivers/gpu/drm/msm/dp/dp_catalog.c b/drivers/gpu/drm/msm/dp/dp_catalog.c index b15b4ce4ba35a7..4963bfe6a47263 100644 --- a/drivers/gpu/drm/msm/dp/dp_catalog.c +++ b/drivers/gpu/drm/msm/dp/dp_catalog.c @@ -572,6 +572,19 @@ void dp_catalog_ctrl_hpd_config(struct dp_catalog *dp_catalog) dp_write_aux(catalog, REG_DP_DP_HPD_CTRL, DP_DP_HPD_CTRL_HPD_EN); } +u32 dp_catalog_hpd_get_state_status(struct dp_catalog *dp_catalog) +{ + struct dp_catalog_private *catalog = container_of(dp_catalog, + struct dp_catalog_private, dp_catalog); + u32 status; + + status = dp_read_aux(catalog, REG_DP_DP_HPD_INT_STATUS); + status >>= DP_DP_HPD_STATE_STATUS_BITS_SHIFT; + status &= DP_DP_HPD_STATE_STATUS_BITS_MASK; + + return status; +} + u32 dp_catalog_hpd_get_intr_status(struct dp_catalog *dp_catalog) { struct dp_catalog_private *catalog = container_of(dp_catalog, diff --git a/drivers/gpu/drm/msm/dp/dp_catalog.h b/drivers/gpu/drm/msm/dp/dp_catalog.h index 4b7666f1fe6fe7..6d257dbebf294e 100644 --- a/drivers/gpu/drm/msm/dp/dp_catalog.h +++ b/drivers/gpu/drm/msm/dp/dp_catalog.h @@ -97,6 +97,7 @@ void dp_catalog_ctrl_enable_irq(struct dp_catalog *dp_catalog, bool enable); void dp_catalog_hpd_config_intr(struct dp_catalog *dp_catalog, u32 intr_mask, bool en); void dp_catalog_ctrl_hpd_config(struct dp_catalog *dp_catalog); +u32 dp_catalog_hpd_get_state_status(struct dp_catalog *dp_catalog); u32 dp_catalog_hpd_get_intr_status(struct dp_catalog *dp_catalog); void dp_catalog_ctrl_phy_reset(struct dp_catalog *dp_catalog); int dp_catalog_ctrl_update_vx_px(struct dp_catalog *dp_catalog, u8 v_level, diff --git a/drivers/gpu/drm/msm/dp/dp_ctrl.c b/drivers/gpu/drm/msm/dp/dp_ctrl.c index 2e3e1917351f00..c83a1650437da5 100644 --- a/drivers/gpu/drm/msm/dp/dp_ctrl.c +++ b/drivers/gpu/drm/msm/dp/dp_ctrl.c @@ -1061,23 +1061,15 @@ static bool dp_ctrl_train_pattern_set(struct dp_ctrl_private *ctrl, static int dp_ctrl_read_link_status(struct dp_ctrl_private *ctrl, u8 *link_status) { - int len = 0; - u32 const offset = DP_LANE_ALIGN_STATUS_UPDATED - DP_LANE0_1_STATUS; - u32 link_status_read_max_retries = 100; - - while (--link_status_read_max_retries) { - len = drm_dp_dpcd_read_link_status(ctrl->aux, - link_status); - if (len != DP_LINK_STATUS_SIZE) { - DRM_ERROR("DP link status read failed, err: %d\n", len); - return len; - } + int ret = 0, len; - if (!(link_status[offset] & DP_LINK_STATUS_UPDATED)) - return 0; + len = drm_dp_dpcd_read_link_status(ctrl->aux, link_status); + if (len != DP_LINK_STATUS_SIZE) { + DRM_ERROR("DP link status read failed, err: %d\n", len); + ret = -EINVAL; } - return -ETIMEDOUT; + return ret; } static int dp_ctrl_link_train_1(struct dp_ctrl_private *ctrl, @@ -1400,6 +1392,8 @@ int dp_ctrl_host_init(struct dp_ctrl *dp_ctrl, bool flip) void dp_ctrl_host_deinit(struct dp_ctrl *dp_ctrl) { struct dp_ctrl_private *ctrl; + struct dp_io *dp_io; + struct phy *phy; if (!dp_ctrl) { DRM_ERROR("Invalid input data\n"); @@ -1407,8 +1401,11 @@ void dp_ctrl_host_deinit(struct dp_ctrl *dp_ctrl) } ctrl = container_of(dp_ctrl, struct dp_ctrl_private, dp_ctrl); + dp_io = &ctrl->parser->io; + phy = dp_io->phy; dp_catalog_ctrl_enable_irq(ctrl->catalog, false); + phy_exit(phy); DRM_DEBUG_DP("Host deinitialized successfully\n"); } @@ -1643,9 +1640,6 @@ int dp_ctrl_on_link(struct dp_ctrl *dp_ctrl) if (rc) return rc; - ctrl->link->phy_params.p_level = 0; - ctrl->link->phy_params.v_level = 0; - while (--link_train_max_retries && !atomic_read(&ctrl->dp_ctrl.aborted)) { rc = dp_ctrl_reinitialize_mainlink(ctrl); diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c index e175aa3fd3a932..fe0279542a1c2b 100644 --- a/drivers/gpu/drm/msm/dp/dp_display.c +++ b/drivers/gpu/drm/msm/dp/dp_display.c @@ -108,14 +108,12 @@ struct dp_display_private { /* event related only access by event thread */ struct mutex event_mutex; wait_queue_head_t event_q; - atomic_t hpd_state; + u32 hpd_state; u32 event_pndx; u32 event_gndx; struct dp_event event_list[DP_EVENT_Q_MAX]; spinlock_t event_lock; - struct completion resume_comp; - struct dp_audio *audio; }; @@ -335,6 +333,7 @@ static int dp_display_process_hpd_high(struct dp_display_private *dp) dp->dp_display.max_pclk_khz = DP_MAX_PIXEL_CLK_KHZ; dp->dp_display.max_dp_lanes = dp->parser->max_dp_lanes; + dp_link_reset_phy_params_vx_px(dp->link); rc = dp_ctrl_on_link(dp->ctrl); if (rc) { DRM_ERROR("failed to complete DP link training\n"); @@ -366,6 +365,20 @@ static void dp_display_host_init(struct dp_display_private *dp) dp->core_initialized = true; } +static void dp_display_host_deinit(struct dp_display_private *dp) +{ + if (!dp->core_initialized) { + DRM_DEBUG_DP("DP core not initialized\n"); + return; + } + + dp_ctrl_host_deinit(dp->ctrl); + dp_aux_deinit(dp->aux); + dp_power_deinit(dp->power); + + dp->core_initialized = false; +} + static int dp_display_usbpd_configure_cb(struct device *dev) { int rc = 0; @@ -490,7 +503,7 @@ static int dp_hpd_plug_handle(struct dp_display_private *dp, u32 data) mutex_lock(&dp->event_mutex); - state = atomic_read(&dp->hpd_state); + state = dp->hpd_state; if (state == ST_SUSPEND_PENDING) { mutex_unlock(&dp->event_mutex); return 0; @@ -508,17 +521,14 @@ static int dp_hpd_plug_handle(struct dp_display_private *dp, u32 data) return 0; } - if (state == ST_SUSPENDED) - tout = DP_TIMEOUT_NONE; - - atomic_set(&dp->hpd_state, ST_CONNECT_PENDING); + dp->hpd_state = ST_CONNECT_PENDING; hpd->hpd_high = 1; ret = dp_display_usbpd_configure_cb(&dp->pdev->dev); if (ret) { /* failed */ hpd->hpd_high = 0; - atomic_set(&dp->hpd_state, ST_DISCONNECTED); + dp->hpd_state = ST_DISCONNECTED; } /* start sanity checking */ @@ -539,10 +549,10 @@ static int dp_connect_pending_timeout(struct dp_display_private *dp, u32 data) mutex_lock(&dp->event_mutex); - state = atomic_read(&dp->hpd_state); + state = dp->hpd_state; if (state == ST_CONNECT_PENDING) { dp_display_enable(dp, 0); - atomic_set(&dp->hpd_state, ST_CONNECTED); + dp->hpd_state = ST_CONNECTED; } mutex_unlock(&dp->event_mutex); @@ -553,7 +563,14 @@ static int dp_connect_pending_timeout(struct dp_display_private *dp, u32 data) static void dp_display_handle_plugged_change(struct msm_dp *dp_display, bool plugged) { - if (dp_display->plugged_cb && dp_display->codec_dev) + struct dp_display_private *dp; + + dp = container_of(dp_display, + struct dp_display_private, dp_display); + + /* notify audio subsystem only if sink supports audio */ + if (dp_display->plugged_cb && dp_display->codec_dev && + dp->audio_supported) dp_display->plugged_cb(dp_display->codec_dev, plugged); } @@ -567,7 +584,7 @@ static int dp_hpd_unplug_handle(struct dp_display_private *dp, u32 data) mutex_lock(&dp->event_mutex); - state = atomic_read(&dp->hpd_state); + state = dp->hpd_state; if (state == ST_SUSPEND_PENDING) { mutex_unlock(&dp->event_mutex); return 0; @@ -585,7 +602,7 @@ static int dp_hpd_unplug_handle(struct dp_display_private *dp, u32 data) return 0; } - atomic_set(&dp->hpd_state, ST_DISCONNECT_PENDING); + dp->hpd_state = ST_DISCONNECT_PENDING; /* disable HPD plug interrupt until disconnect is done */ dp_catalog_hpd_config_intr(dp->catalog, DP_DP_HPD_PLUG_INT_MASK @@ -620,10 +637,10 @@ static int dp_disconnect_pending_timeout(struct dp_display_private *dp, u32 data mutex_lock(&dp->event_mutex); - state = atomic_read(&dp->hpd_state); + state = dp->hpd_state; if (state == ST_DISCONNECT_PENDING) { dp_display_disable(dp, 0); - atomic_set(&dp->hpd_state, ST_DISCONNECTED); + dp->hpd_state = ST_DISCONNECTED; } mutex_unlock(&dp->event_mutex); @@ -638,7 +655,7 @@ static int dp_irq_hpd_handle(struct dp_display_private *dp, u32 data) mutex_lock(&dp->event_mutex); /* irq_hpd can happen at either connected or disconnected state */ - state = atomic_read(&dp->hpd_state); + state = dp->hpd_state; if (state == ST_SUSPEND_PENDING) { mutex_unlock(&dp->event_mutex); return 0; @@ -789,17 +806,10 @@ static int dp_display_enable(struct dp_display_private *dp, u32 data) dp_display = g_dp_display; - if (dp_display->power_on) { - DRM_DEBUG_DP("Link already setup, return\n"); - return 0; - } - rc = dp_ctrl_on_stream(dp->ctrl); if (!rc) dp_display->power_on = true; - /* complete resume_comp regardless it is armed or not */ - complete(&dp->resume_comp); return rc; } @@ -828,9 +838,6 @@ static int dp_display_disable(struct dp_display_private *dp, u32 data) dp_display = g_dp_display; - if (!dp_display->power_on) - return -EINVAL; - /* wait only if audio was enabled */ if (dp_display->audio_enabled) { if (!wait_for_completion_timeout(&dp->audio_comp, @@ -1151,9 +1158,6 @@ static int dp_display_probe(struct platform_device *pdev) } mutex_init(&dp->event_mutex); - - init_completion(&dp->resume_comp); - g_dp_display = &dp->dp_display; /* Store DP audio handle inside DP display */ @@ -1189,20 +1193,54 @@ static int dp_display_remove(struct platform_device *pdev) static int dp_pm_resume(struct device *dev) { + struct platform_device *pdev = to_platform_device(dev); + struct msm_dp *dp_display = platform_get_drvdata(pdev); + struct dp_display_private *dp; + u32 status; + + dp = container_of(dp_display, struct dp_display_private, dp_display); + + mutex_lock(&dp->event_mutex); + + /* start from disconnected state */ + dp->hpd_state = ST_DISCONNECTED; + + /* turn on dp ctrl/phy */ + dp_display_host_init(dp); + + dp_catalog_ctrl_hpd_config(dp->catalog); + + status = dp_catalog_hpd_get_state_status(dp->catalog); + + if (status) { + dp->dp_display.is_connected = true; + } else { + dp->dp_display.is_connected = false; + /* make sure next resume host_init be called */ + dp->core_initialized = false; + } + + mutex_unlock(&dp->event_mutex); + return 0; } static int dp_pm_suspend(struct device *dev) { struct platform_device *pdev = to_platform_device(dev); - struct dp_display_private *dp = platform_get_drvdata(pdev); + struct msm_dp *dp_display = platform_get_drvdata(pdev); + struct dp_display_private *dp; - if (!dp) { - DRM_ERROR("DP driver bind failed. Invalid driver data\n"); - return -EINVAL; - } + dp = container_of(dp_display, struct dp_display_private, dp_display); + + mutex_lock(&dp->event_mutex); - atomic_set(&dp->hpd_state, ST_SUSPENDED); + if (dp->core_initialized == true) + dp_display_host_deinit(dp); + + dp->hpd_state = ST_SUSPENDED; + + mutex_unlock(&dp->event_mutex); return 0; } @@ -1317,19 +1355,6 @@ int msm_dp_modeset_init(struct msm_dp *dp_display, struct drm_device *dev, return 0; } -static int dp_display_wait4resume_done(struct dp_display_private *dp) -{ - int ret = 0; - - reinit_completion(&dp->resume_comp); - if (!wait_for_completion_timeout(&dp->resume_comp, - WAIT_FOR_RESUME_TIMEOUT_JIFFIES)) { - DRM_ERROR("wait4resume_done timedout\n"); - ret = -ETIMEDOUT; - } - return ret; -} - int msm_dp_display_enable(struct msm_dp *dp, struct drm_encoder *encoder) { int rc = 0; @@ -1344,6 +1369,8 @@ int msm_dp_display_enable(struct msm_dp *dp, struct drm_encoder *encoder) mutex_lock(&dp_display->event_mutex); + dp_del_event(dp_display, EV_CONNECT_PENDING_TIMEOUT); + rc = dp_display_set_mode(dp, &dp_display->dp_mode); if (rc) { DRM_ERROR("Failed to perform a mode set, rc=%d\n", rc); @@ -1358,15 +1385,10 @@ int msm_dp_display_enable(struct msm_dp *dp, struct drm_encoder *encoder) return rc; } - state = atomic_read(&dp_display->hpd_state); - if (state == ST_SUSPENDED) { - /* start link training */ - dp_add_event(dp_display, EV_HPD_PLUG_INT, 0, 0); - mutex_unlock(&dp_display->event_mutex); + state = dp_display->hpd_state; - /* wait until dp interface is up */ - goto resume_done; - } + if (state == ST_SUSPEND_PENDING) + dp_display_host_init(dp_display); dp_display_enable(dp_display, 0); @@ -1377,21 +1399,15 @@ int msm_dp_display_enable(struct msm_dp *dp, struct drm_encoder *encoder) dp_display_unprepare(dp); } - dp_del_event(dp_display, EV_CONNECT_PENDING_TIMEOUT); - if (state == ST_SUSPEND_PENDING) dp_add_event(dp_display, EV_IRQ_HPD_INT, 0, 0); /* completed connection */ - atomic_set(&dp_display->hpd_state, ST_CONNECTED); + dp_display->hpd_state = ST_CONNECTED; mutex_unlock(&dp_display->event_mutex); return rc; - -resume_done: - dp_display_wait4resume_done(dp_display); - return rc; } int msm_dp_display_pre_disable(struct msm_dp *dp, struct drm_encoder *encoder) @@ -1415,20 +1431,20 @@ int msm_dp_display_disable(struct msm_dp *dp, struct drm_encoder *encoder) mutex_lock(&dp_display->event_mutex); + dp_del_event(dp_display, EV_DISCONNECT_PENDING_TIMEOUT); + dp_display_disable(dp_display, 0); rc = dp_display_unprepare(dp); if (rc) DRM_ERROR("DP display unprepare failed, rc=%d\n", rc); - dp_del_event(dp_display, EV_DISCONNECT_PENDING_TIMEOUT); - - state = atomic_read(&dp_display->hpd_state); + state = dp_display->hpd_state; if (state == ST_DISCONNECT_PENDING) { /* completed disconnection */ - atomic_set(&dp_display->hpd_state, ST_DISCONNECTED); + dp_display->hpd_state = ST_DISCONNECTED; } else { - atomic_set(&dp_display->hpd_state, ST_SUSPEND_PENDING); + dp_display->hpd_state = ST_SUSPEND_PENDING; } mutex_unlock(&dp_display->event_mutex); diff --git a/drivers/gpu/drm/msm/dp/dp_link.c b/drivers/gpu/drm/msm/dp/dp_link.c index c811da515fb3ba..be986da78c4a5f 100644 --- a/drivers/gpu/drm/msm/dp/dp_link.c +++ b/drivers/gpu/drm/msm/dp/dp_link.c @@ -773,7 +773,8 @@ static int dp_link_process_link_training_request(struct dp_link_private *link) link->request.test_lane_count); link->dp_link.link_params.num_lanes = link->request.test_lane_count; - link->dp_link.link_params.rate = link->request.test_link_rate; + link->dp_link.link_params.rate = + drm_dp_bw_code_to_link_rate(link->request.test_link_rate); return 0; } @@ -869,6 +870,9 @@ static int dp_link_parse_vx_px(struct dp_link_private *link) drm_dp_get_adjust_request_voltage(link->link_status, 0); link->dp_link.phy_params.p_level = drm_dp_get_adjust_request_pre_emphasis(link->link_status, 0); + + link->dp_link.phy_params.p_level >>= DP_TRAIN_PRE_EMPHASIS_SHIFT; + DRM_DEBUG_DP("Requested: v_level = 0x%x, p_level = 0x%x\n", link->dp_link.phy_params.v_level, link->dp_link.phy_params.p_level); @@ -911,7 +915,8 @@ static int dp_link_process_phy_test_pattern_request( link->request.test_lane_count); link->dp_link.link_params.num_lanes = link->request.test_lane_count; - link->dp_link.link_params.rate = link->request.test_link_rate; + link->dp_link.link_params.rate = + drm_dp_bw_code_to_link_rate(link->request.test_link_rate); ret = dp_link_parse_vx_px(link); @@ -939,22 +944,20 @@ static u8 get_link_status(const u8 link_status[DP_LINK_STATUS_SIZE], int r) */ static int dp_link_process_link_status_update(struct dp_link_private *link) { - if (!(get_link_status(link->link_status, - DP_LANE_ALIGN_STATUS_UPDATED) & - DP_LINK_STATUS_UPDATED) || - (drm_dp_clock_recovery_ok(link->link_status, - link->dp_link.link_params.num_lanes) && - drm_dp_channel_eq_ok(link->link_status, - link->dp_link.link_params.num_lanes))) - return -EINVAL; + bool channel_eq_done = drm_dp_channel_eq_ok(link->link_status, + link->dp_link.link_params.num_lanes); - DRM_DEBUG_DP("channel_eq_done = %d, clock_recovery_done = %d\n", - drm_dp_clock_recovery_ok(link->link_status, - link->dp_link.link_params.num_lanes), - drm_dp_clock_recovery_ok(link->link_status, - link->dp_link.link_params.num_lanes)); + bool clock_recovery_done = drm_dp_clock_recovery_ok(link->link_status, + link->dp_link.link_params.num_lanes); - return 0; + DRM_DEBUG_DP("channel_eq_done = %d, clock_recovery_done = %d\n", + channel_eq_done, clock_recovery_done); + + if (channel_eq_done && clock_recovery_done) + return -EINVAL; + + + return 0; } /** @@ -1156,6 +1159,12 @@ int dp_link_adjust_levels(struct dp_link *dp_link, u8 *link_status) return 0; } +void dp_link_reset_phy_params_vx_px(struct dp_link *dp_link) +{ + dp_link->phy_params.v_level = 0; + dp_link->phy_params.p_level = 0; +} + u32 dp_link_get_test_bits_depth(struct dp_link *dp_link, u32 bpp) { u32 tbd; diff --git a/drivers/gpu/drm/msm/dp/dp_link.h b/drivers/gpu/drm/msm/dp/dp_link.h index 49811b6221e53f..9dd4dd92653046 100644 --- a/drivers/gpu/drm/msm/dp/dp_link.h +++ b/drivers/gpu/drm/msm/dp/dp_link.h @@ -135,6 +135,7 @@ static inline u32 dp_link_bit_depth_to_bpc(u32 tbd) } } +void dp_link_reset_phy_params_vx_px(struct dp_link *dp_link); u32 dp_link_get_test_bits_depth(struct dp_link *dp_link, u32 bpp); int dp_link_process_request(struct dp_link *dp_link); int dp_link_get_colorimetry_config(struct dp_link *dp_link); diff --git a/drivers/gpu/drm/msm/dp/dp_reg.h b/drivers/gpu/drm/msm/dp/dp_reg.h index 43042ff90a199c..268602803d9a32 100644 --- a/drivers/gpu/drm/msm/dp/dp_reg.h +++ b/drivers/gpu/drm/msm/dp/dp_reg.h @@ -32,6 +32,8 @@ #define DP_DP_IRQ_HPD_INT_ACK (0x00000002) #define DP_DP_HPD_REPLUG_INT_ACK (0x00000004) #define DP_DP_HPD_UNPLUG_INT_ACK (0x00000008) +#define DP_DP_HPD_STATE_STATUS_BITS_MASK (0x0000000F) +#define DP_DP_HPD_STATE_STATUS_BITS_SHIFT (0x1C) #define REG_DP_DP_HPD_INT_MASK (0x0000000C) #define DP_DP_HPD_PLUG_INT_MASK (0x00000001) diff --git a/drivers/gpu/drm/msm/dsi/pll/dsi_pll_10nm.c b/drivers/gpu/drm/msm/dsi/pll/dsi_pll_10nm.c index 6ac04fc303f569..e4e9bf04b73687 100644 --- a/drivers/gpu/drm/msm/dsi/pll/dsi_pll_10nm.c +++ b/drivers/gpu/drm/msm/dsi/pll/dsi_pll_10nm.c @@ -559,6 +559,7 @@ static int dsi_pll_10nm_restore_state(struct msm_dsi_pll *pll) struct pll_10nm_cached_state *cached = &pll_10nm->cached_state; void __iomem *phy_base = pll_10nm->phy_cmn_mmio; u32 val; + int ret; val = pll_read(pll_10nm->mmio + REG_DSI_10nm_PHY_PLL_PLL_OUTDIV_RATE); val &= ~0x3; @@ -573,6 +574,13 @@ static int dsi_pll_10nm_restore_state(struct msm_dsi_pll *pll) val |= cached->pll_mux; pll_write(phy_base + REG_DSI_10nm_PHY_CMN_CLK_CFG1, val); + ret = dsi_pll_10nm_vco_set_rate(&pll->clk_hw, pll_10nm->vco_current_rate, pll_10nm->vco_ref_clk_rate); + if (ret) { + DRM_DEV_ERROR(&pll_10nm->pdev->dev, + "restore vco rate failed. ret=%d\n", ret); + return ret; + } + DBG("DSI PLL%d", pll_10nm->id); return 0; diff --git a/drivers/gpu/drm/msm/dsi/pll/dsi_pll_7nm.c b/drivers/gpu/drm/msm/dsi/pll/dsi_pll_7nm.c index de0dfb8151258d..93bf142e4a4e6a 100644 --- a/drivers/gpu/drm/msm/dsi/pll/dsi_pll_7nm.c +++ b/drivers/gpu/drm/msm/dsi/pll/dsi_pll_7nm.c @@ -585,6 +585,7 @@ static int dsi_pll_7nm_restore_state(struct msm_dsi_pll *pll) struct pll_7nm_cached_state *cached = &pll_7nm->cached_state; void __iomem *phy_base = pll_7nm->phy_cmn_mmio; u32 val; + int ret; val = pll_read(pll_7nm->mmio + REG_DSI_7nm_PHY_PLL_PLL_OUTDIV_RATE); val &= ~0x3; @@ -599,6 +600,13 @@ static int dsi_pll_7nm_restore_state(struct msm_dsi_pll *pll) val |= cached->pll_mux; pll_write(phy_base + REG_DSI_7nm_PHY_CMN_CLK_CFG1, val); + ret = dsi_pll_7nm_vco_set_rate(&pll->clk_hw, pll_7nm->vco_current_rate, pll_7nm->vco_ref_clk_rate); + if (ret) { + DRM_DEV_ERROR(&pll_7nm->pdev->dev, + "restore vco rate failed. ret=%d\n", ret); + return ret; + } + DBG("DSI PLL%d", pll_7nm->id); return 0; diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index 49685571dc0eeb..d556c353e5aead 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -444,14 +444,14 @@ static int msm_drm_init(struct device *dev, struct drm_driver *drv) drm_mode_config_init(ddev); - /* Bind all our sub-components: */ - ret = component_bind_all(dev, ddev); + ret = msm_init_vram(ddev); if (ret) goto err_destroy_mdss; - ret = msm_init_vram(ddev); + /* Bind all our sub-components: */ + ret = component_bind_all(dev, ddev); if (ret) - goto err_msm_uninit; + goto err_destroy_mdss; dma_set_max_seg_size(dev, UINT_MAX); diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h index b9dd8f8f488724..0b2686b060c731 100644 --- a/drivers/gpu/drm/msm/msm_drv.h +++ b/drivers/gpu/drm/msm/msm_drv.h @@ -423,6 +423,11 @@ static inline int msm_dp_display_disable(struct msm_dp *dp, { return -EINVAL; } +static inline int msm_dp_display_pre_disable(struct msm_dp *dp, + struct drm_encoder *encoder) +{ + return -EINVAL; +} static inline void msm_dp_display_mode_set(struct msm_dp *dp, struct drm_encoder *encoder, struct drm_display_mode *mode, diff --git a/drivers/gpu/drm/mxsfb/mxsfb_drv.c b/drivers/gpu/drm/mxsfb/mxsfb_drv.c index 35122aef037b4a..17f26052e84508 100644 --- a/drivers/gpu/drm/mxsfb/mxsfb_drv.c +++ b/drivers/gpu/drm/mxsfb/mxsfb_drv.c @@ -134,11 +134,8 @@ static int mxsfb_attach_bridge(struct mxsfb_drm_private *mxsfb) return -ENODEV; ret = drm_bridge_attach(&mxsfb->encoder, bridge, NULL, 0); - if (ret) { - DRM_DEV_ERROR(drm->dev, - "failed to attach bridge: %d\n", ret); - return ret; - } + if (ret) + return dev_err_probe(drm->dev, ret, "Failed to attach bridge\n"); mxsfb->bridge = bridge; @@ -212,7 +209,8 @@ static int mxsfb_load(struct drm_device *drm, ret = mxsfb_attach_bridge(mxsfb); if (ret) { - dev_err(drm->dev, "Cannot connect bridge: %d\n", ret); + if (ret != -EPROBE_DEFER) + dev_err(drm->dev, "Cannot connect bridge: %d\n", ret); goto err_vblank; } diff --git a/drivers/gpu/drm/nouveau/dispnv50/base507c.c b/drivers/gpu/drm/nouveau/dispnv50/base507c.c index 302d4e6fc52f1a..788db043a34299 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/base507c.c +++ b/drivers/gpu/drm/nouveau/dispnv50/base507c.c @@ -88,7 +88,11 @@ base507c_image_set(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw) NVVAL(NV507C, SET_CONVERSION, OFS, 0x64)); } else { PUSH_MTHD(push, NV507C, SET_PROCESSING, - NVDEF(NV507C, SET_PROCESSING, USE_GAIN_OFS, DISABLE)); + NVDEF(NV507C, SET_PROCESSING, USE_GAIN_OFS, DISABLE), + + SET_CONVERSION, + NVVAL(NV507C, SET_CONVERSION, GAIN, 0) | + NVVAL(NV507C, SET_CONVERSION, OFS, 0)); } PUSH_MTHD(push, NV507C, SURFACE_SET_OFFSET(0, 0), asyw->image.offset[0] >> 8); diff --git a/drivers/gpu/drm/nouveau/dispnv50/base827c.c b/drivers/gpu/drm/nouveau/dispnv50/base827c.c index 18d34096f1258f..093d4ba6910ec4 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/base827c.c +++ b/drivers/gpu/drm/nouveau/dispnv50/base827c.c @@ -49,7 +49,11 @@ base827c_image_set(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw) NVVAL(NV827C, SET_CONVERSION, OFS, 0x64)); } else { PUSH_MTHD(push, NV827C, SET_PROCESSING, - NVDEF(NV827C, SET_PROCESSING, USE_GAIN_OFS, DISABLE)); + NVDEF(NV827C, SET_PROCESSING, USE_GAIN_OFS, DISABLE), + + SET_CONVERSION, + NVVAL(NV827C, SET_CONVERSION, GAIN, 0) | + NVVAL(NV827C, SET_CONVERSION, OFS, 0)); } PUSH_MTHD(push, NV827C, SURFACE_SET_OFFSET(0, 0), asyw->image.offset[0] >> 8, diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c b/drivers/gpu/drm/nouveau/dispnv50/disp.c index 36d6b6093d16d9..5b8cabb099eb1a 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/disp.c +++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c @@ -221,7 +221,7 @@ nv50_dmac_wait(struct nvif_push *push, u32 size) int nv50_dmac_create(struct nvif_device *device, struct nvif_object *disp, - const s32 *oclass, u8 head, void *data, u32 size, u64 syncbuf, + const s32 *oclass, u8 head, void *data, u32 size, s64 syncbuf, struct nv50_dmac *dmac) { struct nouveau_cli *cli = (void *)device->object.client; @@ -270,7 +270,7 @@ nv50_dmac_create(struct nvif_device *device, struct nvif_object *disp, if (ret) return ret; - if (!syncbuf) + if (syncbuf < 0) return 0; ret = nvif_object_ctor(&dmac->base.user, "kmsSyncCtxDma", NV50_DISP_HANDLE_SYNCBUF, diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.h b/drivers/gpu/drm/nouveau/dispnv50/disp.h index 92bddc0836171b..38dec11e7dda55 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/disp.h +++ b/drivers/gpu/drm/nouveau/dispnv50/disp.h @@ -95,7 +95,7 @@ struct nv50_outp_atom { int nv50_dmac_create(struct nvif_device *device, struct nvif_object *disp, const s32 *oclass, u8 head, void *data, u32 size, - u64 syncbuf, struct nv50_dmac *dmac); + s64 syncbuf, struct nv50_dmac *dmac); void nv50_dmac_destroy(struct nv50_dmac *); /* diff --git a/drivers/gpu/drm/nouveau/dispnv50/head917d.c b/drivers/gpu/drm/nouveau/dispnv50/head917d.c index a5d8274036609c..ea9f8667305ecb 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/head917d.c +++ b/drivers/gpu/drm/nouveau/dispnv50/head917d.c @@ -22,6 +22,7 @@ #include "head.h" #include "core.h" +#include "nvif/push.h" #include #include @@ -73,6 +74,31 @@ head917d_base(struct nv50_head *head, struct nv50_head_atom *asyh) return 0; } +static int +head917d_curs_set(struct nv50_head *head, struct nv50_head_atom *asyh) +{ + struct nvif_push *push = nv50_disp(head->base.base.dev)->core->chan.push; + const int i = head->base.index; + int ret; + + ret = PUSH_WAIT(push, 5); + if (ret) + return ret; + + PUSH_MTHD(push, NV917D, HEAD_SET_CONTROL_CURSOR(i), + NVDEF(NV917D, HEAD_SET_CONTROL_CURSOR, ENABLE, ENABLE) | + NVVAL(NV917D, HEAD_SET_CONTROL_CURSOR, FORMAT, asyh->curs.format) | + NVVAL(NV917D, HEAD_SET_CONTROL_CURSOR, SIZE, asyh->curs.layout) | + NVVAL(NV917D, HEAD_SET_CONTROL_CURSOR, HOT_SPOT_X, 0) | + NVVAL(NV917D, HEAD_SET_CONTROL_CURSOR, HOT_SPOT_Y, 0) | + NVDEF(NV917D, HEAD_SET_CONTROL_CURSOR, COMPOSITION, ALPHA_BLEND), + + HEAD_SET_OFFSET_CURSOR(i), asyh->curs.offset >> 8); + + PUSH_MTHD(push, NV917D, HEAD_SET_CONTEXT_DMA_CURSOR(i), asyh->curs.handle); + return 0; +} + int head917d_curs_layout(struct nv50_head *head, struct nv50_wndw_atom *asyw, struct nv50_head_atom *asyh) @@ -101,7 +127,7 @@ head917d = { .core_clr = head907d_core_clr, .curs_layout = head917d_curs_layout, .curs_format = head507d_curs_format, - .curs_set = head907d_curs_set, + .curs_set = head917d_curs_set, .curs_clr = head907d_curs_clr, .base = head917d_base, .ovly = head907d_ovly, diff --git a/drivers/gpu/drm/nouveau/dispnv50/wimmc37b.c b/drivers/gpu/drm/nouveau/dispnv50/wimmc37b.c index 685b7087132426..b390029c69ec13 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/wimmc37b.c +++ b/drivers/gpu/drm/nouveau/dispnv50/wimmc37b.c @@ -76,7 +76,7 @@ wimmc37b_init_(const struct nv50_wimm_func *func, struct nouveau_drm *drm, int ret; ret = nv50_dmac_create(&drm->client.device, &disp->disp->object, - &oclass, 0, &args, sizeof(args), 0, + &oclass, 0, &args, sizeof(args), -1, &wndw->wimm); if (ret) { NV_ERROR(drm, "wimm%04x allocation failed: %d\n", oclass, ret); diff --git a/drivers/gpu/drm/nouveau/dispnv50/wndw.c b/drivers/gpu/drm/nouveau/dispnv50/wndw.c index 0356474ad6f6af..f07916ffe42cb2 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/wndw.c +++ b/drivers/gpu/drm/nouveau/dispnv50/wndw.c @@ -702,6 +702,11 @@ nv50_wndw_init(struct nv50_wndw *wndw) nvif_notify_get(&wndw->notify); } +static const u64 nv50_cursor_format_modifiers[] = { + DRM_FORMAT_MOD_LINEAR, + DRM_FORMAT_MOD_INVALID, +}; + int nv50_wndw_new_(const struct nv50_wndw_func *func, struct drm_device *dev, enum drm_plane_type type, const char *name, int index, @@ -713,6 +718,7 @@ nv50_wndw_new_(const struct nv50_wndw_func *func, struct drm_device *dev, struct nvif_mmu *mmu = &drm->client.mmu; struct nv50_disp *disp = nv50_disp(dev); struct nv50_wndw *wndw; + const u64 *format_modifiers; int nformat; int ret; @@ -728,10 +734,13 @@ nv50_wndw_new_(const struct nv50_wndw_func *func, struct drm_device *dev, for (nformat = 0; format[nformat]; nformat++); - ret = drm_universal_plane_init(dev, &wndw->plane, heads, &nv50_wndw, - format, nformat, - nouveau_display(dev)->format_modifiers, - type, "%s-%d", name, index); + if (type == DRM_PLANE_TYPE_CURSOR) + format_modifiers = nv50_cursor_format_modifiers; + else + format_modifiers = nouveau_display(dev)->format_modifiers; + + ret = drm_universal_plane_init(dev, &wndw->plane, heads, &nv50_wndw, format, nformat, + format_modifiers, type, "%s-%d", name, index); if (ret) { kfree(*pwndw); *pwndw = NULL; diff --git a/drivers/gpu/drm/nouveau/include/nvhw/class/cl917d.h b/drivers/gpu/drm/nouveau/include/nvhw/class/cl917d.h index 2a2612d6e1e0e6..fb223723a38add 100644 --- a/drivers/gpu/drm/nouveau/include/nvhw/class/cl917d.h +++ b/drivers/gpu/drm/nouveau/include/nvhw/class/cl917d.h @@ -66,6 +66,10 @@ #define NV917D_HEAD_SET_CONTROL_CURSOR_COMPOSITION_ALPHA_BLEND (0x00000000) #define NV917D_HEAD_SET_CONTROL_CURSOR_COMPOSITION_PREMULT_ALPHA_BLEND (0x00000001) #define NV917D_HEAD_SET_CONTROL_CURSOR_COMPOSITION_XOR (0x00000002) +#define NV917D_HEAD_SET_OFFSET_CURSOR(a) (0x00000484 + (a)*0x00000300) +#define NV917D_HEAD_SET_OFFSET_CURSOR_ORIGIN 31:0 +#define NV917D_HEAD_SET_CONTEXT_DMA_CURSOR(a) (0x0000048C + (a)*0x00000300) +#define NV917D_HEAD_SET_CONTEXT_DMA_CURSOR_HANDLE 31:0 #define NV917D_HEAD_SET_DITHER_CONTROL(a) (0x000004A0 + (a)*0x00000300) #define NV917D_HEAD_SET_DITHER_CONTROL_ENABLE 0:0 #define NV917D_HEAD_SET_DITHER_CONTROL_ENABLE_DISABLE (0x00000000) diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.c b/drivers/gpu/drm/nouveau/nouveau_svm.c index 4f69e4c3dafde2..1c3f890377d2c2 100644 --- a/drivers/gpu/drm/nouveau/nouveau_svm.c +++ b/drivers/gpu/drm/nouveau/nouveau_svm.c @@ -315,6 +315,10 @@ nouveau_svmm_init(struct drm_device *dev, void *data, struct drm_nouveau_svm_init *args = data; int ret; + /* We need to fail if svm is disabled */ + if (!cli->drm->svm) + return -ENOSYS; + /* Allocate tracking for SVM-enabled VMM. */ if (!(svmm = kzalloc(sizeof(*svmm), GFP_KERNEL))) return -ENOMEM; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c index 7deb81b6dbac6b..4b571cc6bc70f4 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c @@ -75,7 +75,7 @@ shadow_image(struct nvkm_bios *bios, int idx, u32 offset, struct shadow *mthd) nvkm_debug(subdev, "%08x: type %02x, %d bytes\n", image.base, image.type, image.size); - if (!shadow_fetch(bios, mthd, image.size)) { + if (!shadow_fetch(bios, mthd, image.base + image.size)) { nvkm_debug(subdev, "%08x: fetch failed\n", image.base); return 0; } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxgm200.c b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxgm200.c index edb6148cbca042..d0e80ad526845f 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxgm200.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxgm200.c @@ -33,7 +33,7 @@ static void gm200_i2c_aux_fini(struct gm200_i2c_aux *aux) { struct nvkm_device *device = aux->base.pad->i2c->subdev.device; - nvkm_mask(device, 0x00d954 + (aux->ch * 0x50), 0x00310000, 0x00000000); + nvkm_mask(device, 0x00d954 + (aux->ch * 0x50), 0x00710000, 0x00000000); } static int @@ -54,10 +54,10 @@ gm200_i2c_aux_init(struct gm200_i2c_aux *aux) AUX_ERR(&aux->base, "begin idle timeout %08x", ctrl); return -EBUSY; } - } while (ctrl & 0x03010000); + } while (ctrl & 0x07010000); /* set some magic, and wait up to 1ms for it to appear */ - nvkm_mask(device, 0x00d954 + (aux->ch * 0x50), 0x00300000, ureq); + nvkm_mask(device, 0x00d954 + (aux->ch * 0x50), 0x00700000, ureq); timeout = 1000; do { ctrl = nvkm_rd32(device, 0x00d954 + (aux->ch * 0x50)); @@ -67,7 +67,7 @@ gm200_i2c_aux_init(struct gm200_i2c_aux *aux) gm200_i2c_aux_fini(aux); return -EBUSY; } - } while ((ctrl & 0x03000000) != urep); + } while ((ctrl & 0x07000000) != urep); return 0; } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gf100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gf100.c index 2340040942c937..1115376bc85f5f 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gf100.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gf100.c @@ -22,6 +22,7 @@ * Authors: Ben Skeggs */ #include "priv.h" +#include static void gf100_ibus_intr_hub(struct nvkm_subdev *ibus, int i) @@ -31,7 +32,6 @@ gf100_ibus_intr_hub(struct nvkm_subdev *ibus, int i) u32 data = nvkm_rd32(device, 0x122124 + (i * 0x0400)); u32 stat = nvkm_rd32(device, 0x122128 + (i * 0x0400)); nvkm_debug(ibus, "HUB%d: %06x %08x (%08x)\n", i, addr, data, stat); - nvkm_mask(device, 0x122128 + (i * 0x0400), 0x00000200, 0x00000000); } static void @@ -42,7 +42,6 @@ gf100_ibus_intr_rop(struct nvkm_subdev *ibus, int i) u32 data = nvkm_rd32(device, 0x124124 + (i * 0x0400)); u32 stat = nvkm_rd32(device, 0x124128 + (i * 0x0400)); nvkm_debug(ibus, "ROP%d: %06x %08x (%08x)\n", i, addr, data, stat); - nvkm_mask(device, 0x124128 + (i * 0x0400), 0x00000200, 0x00000000); } static void @@ -53,7 +52,6 @@ gf100_ibus_intr_gpc(struct nvkm_subdev *ibus, int i) u32 data = nvkm_rd32(device, 0x128124 + (i * 0x0400)); u32 stat = nvkm_rd32(device, 0x128128 + (i * 0x0400)); nvkm_debug(ibus, "GPC%d: %06x %08x (%08x)\n", i, addr, data, stat); - nvkm_mask(device, 0x128128 + (i * 0x0400), 0x00000200, 0x00000000); } void @@ -90,6 +88,12 @@ gf100_ibus_intr(struct nvkm_subdev *ibus) intr1 &= ~stat; } } + + nvkm_mask(device, 0x121c4c, 0x0000003f, 0x00000002); + nvkm_msec(device, 2000, + if (!(nvkm_rd32(device, 0x121c4c) & 0x0000003f)) + break; + ); } static int diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gk104.c b/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gk104.c index f3915f85838ed4..22e487b493ad13 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gk104.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gk104.c @@ -22,6 +22,7 @@ * Authors: Ben Skeggs */ #include "priv.h" +#include static void gk104_ibus_intr_hub(struct nvkm_subdev *ibus, int i) @@ -31,7 +32,6 @@ gk104_ibus_intr_hub(struct nvkm_subdev *ibus, int i) u32 data = nvkm_rd32(device, 0x122124 + (i * 0x0800)); u32 stat = nvkm_rd32(device, 0x122128 + (i * 0x0800)); nvkm_debug(ibus, "HUB%d: %06x %08x (%08x)\n", i, addr, data, stat); - nvkm_mask(device, 0x122128 + (i * 0x0800), 0x00000200, 0x00000000); } static void @@ -42,7 +42,6 @@ gk104_ibus_intr_rop(struct nvkm_subdev *ibus, int i) u32 data = nvkm_rd32(device, 0x124124 + (i * 0x0800)); u32 stat = nvkm_rd32(device, 0x124128 + (i * 0x0800)); nvkm_debug(ibus, "ROP%d: %06x %08x (%08x)\n", i, addr, data, stat); - nvkm_mask(device, 0x124128 + (i * 0x0800), 0x00000200, 0x00000000); } static void @@ -53,7 +52,6 @@ gk104_ibus_intr_gpc(struct nvkm_subdev *ibus, int i) u32 data = nvkm_rd32(device, 0x128124 + (i * 0x0800)); u32 stat = nvkm_rd32(device, 0x128128 + (i * 0x0800)); nvkm_debug(ibus, "GPC%d: %06x %08x (%08x)\n", i, addr, data, stat); - nvkm_mask(device, 0x128128 + (i * 0x0800), 0x00000200, 0x00000000); } void @@ -90,6 +88,12 @@ gk104_ibus_intr(struct nvkm_subdev *ibus) intr1 &= ~stat; } } + + nvkm_mask(device, 0x12004c, 0x0000003f, 0x00000002); + nvkm_msec(device, 2000, + if (!(nvkm_rd32(device, 0x12004c) & 0x0000003f)) + break; + ); } static int diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/base.c index de91e9a2617258..6d5212ae2fd57b 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/base.c @@ -316,9 +316,9 @@ nvkm_mmu_vram(struct nvkm_mmu *mmu) { struct nvkm_device *device = mmu->subdev.device; struct nvkm_mm *mm = &device->fb->ram->vram; - const u32 sizeN = nvkm_mm_heap_size(mm, NVKM_RAM_MM_NORMAL); - const u32 sizeU = nvkm_mm_heap_size(mm, NVKM_RAM_MM_NOMAP); - const u32 sizeM = nvkm_mm_heap_size(mm, NVKM_RAM_MM_MIXED); + const u64 sizeN = nvkm_mm_heap_size(mm, NVKM_RAM_MM_NORMAL); + const u64 sizeU = nvkm_mm_heap_size(mm, NVKM_RAM_MM_NOMAP); + const u64 sizeM = nvkm_mm_heap_size(mm, NVKM_RAM_MM_MIXED); u8 type = NVKM_MEM_KIND * !!mmu->func->kind; u8 heap = NVKM_MEM_VRAM; int heapM, heapN, heapU; diff --git a/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c b/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c index 42ec51bb7b1b0c..7f431724881234 100644 --- a/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c +++ b/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c @@ -889,6 +889,7 @@ static int omap_dmm_probe(struct platform_device *dev) &omap_dmm->refill_pa, GFP_KERNEL); if (!omap_dmm->refill_va) { dev_err(&dev->dev, "could not allocate refill memory\n"); + ret = -ENOMEM; goto fail; } diff --git a/drivers/gpu/drm/panel/panel-orisetech-otm8009a.c b/drivers/gpu/drm/panel/panel-orisetech-otm8009a.c index b6e377aa1131b7..6ac1accade803f 100644 --- a/drivers/gpu/drm/panel/panel-orisetech-otm8009a.c +++ b/drivers/gpu/drm/panel/panel-orisetech-otm8009a.c @@ -452,7 +452,7 @@ static int otm8009a_probe(struct mipi_dsi_device *dsi) dsi->lanes = 2; dsi->format = MIPI_DSI_FMT_RGB888; dsi->mode_flags = MIPI_DSI_MODE_VIDEO | MIPI_DSI_MODE_VIDEO_BURST | - MIPI_DSI_MODE_LPM; + MIPI_DSI_MODE_LPM | MIPI_DSI_CLOCK_NON_CONTINUOUS; drm_panel_init(&ctx->panel, dev, &otm8009a_drm_funcs, DRM_MODE_CONNECTOR_DSI); diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c index 2be358fb46f7d0..204674fccd6465 100644 --- a/drivers/gpu/drm/panel/panel-simple.c +++ b/drivers/gpu/drm/panel/panel-simple.c @@ -1327,6 +1327,7 @@ static const struct drm_display_mode boe_nv133fhm_n61_modes = { .vsync_start = 1080 + 3, .vsync_end = 1080 + 3 + 6, .vtotal = 1080 + 3 + 6 + 31, + .flags = DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_NVSYNC, }; /* Also used for boe_nv133fhm_n62 */ diff --git a/drivers/gpu/drm/panfrost/panfrost_device.c b/drivers/gpu/drm/panfrost/panfrost_device.c index e6896733838ab4..bf7c34cfb84c0c 100644 --- a/drivers/gpu/drm/panfrost/panfrost_device.c +++ b/drivers/gpu/drm/panfrost/panfrost_device.c @@ -206,7 +206,6 @@ int panfrost_device_init(struct panfrost_device *pfdev) struct resource *res; mutex_init(&pfdev->sched_lock); - mutex_init(&pfdev->reset_lock); INIT_LIST_HEAD(&pfdev->scheduled_jobs); INIT_LIST_HEAD(&pfdev->as_lru_list); diff --git a/drivers/gpu/drm/panfrost/panfrost_device.h b/drivers/gpu/drm/panfrost/panfrost_device.h index 2e9cbd1c4a58ee..597cf1459b0a8d 100644 --- a/drivers/gpu/drm/panfrost/panfrost_device.h +++ b/drivers/gpu/drm/panfrost/panfrost_device.h @@ -88,6 +88,7 @@ struct panfrost_device { /* pm_domains for devices with more than one. */ struct device *pm_domain_devs[MAX_PM_DOMAINS]; struct device_link *pm_domain_links[MAX_PM_DOMAINS]; + bool coherent; struct panfrost_features features; const struct panfrost_compatible *comp; @@ -105,7 +106,11 @@ struct panfrost_device { struct panfrost_perfcnt *perfcnt; struct mutex sched_lock; - struct mutex reset_lock; + + struct { + struct work_struct work; + atomic_t pending; + } reset; struct mutex shrinker_lock; struct list_head shrinker_list; diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c index 0fc084110e5ba0..689be734ed200e 100644 --- a/drivers/gpu/drm/panfrost/panfrost_drv.c +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c @@ -587,6 +587,8 @@ static int panfrost_probe(struct platform_device *pdev) if (!pfdev->comp) return -ENODEV; + pfdev->coherent = device_get_dma_attr(&pdev->dev) == DEV_DMA_COHERENT; + /* Allocate and initialze the DRM device. */ ddev = drm_dev_alloc(&panfrost_drm_driver, &pdev->dev); if (IS_ERR(ddev)) diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.c b/drivers/gpu/drm/panfrost/panfrost_gem.c index 62d4d710a57118..57a31dd0ffed19 100644 --- a/drivers/gpu/drm/panfrost/panfrost_gem.c +++ b/drivers/gpu/drm/panfrost/panfrost_gem.c @@ -218,6 +218,7 @@ static const struct drm_gem_object_funcs panfrost_gem_funcs = { */ struct drm_gem_object *panfrost_gem_create_object(struct drm_device *dev, size_t size) { + struct panfrost_device *pfdev = dev->dev_private; struct panfrost_gem_object *obj; obj = kzalloc(sizeof(*obj), GFP_KERNEL); @@ -227,6 +228,7 @@ struct drm_gem_object *panfrost_gem_create_object(struct drm_device *dev, size_t INIT_LIST_HEAD(&obj->mappings.list); mutex_init(&obj->mappings.lock); obj->base.base.funcs = &panfrost_gem_funcs; + obj->base.map_cached = pfdev->coherent; return &obj->base.base; } diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c index 30e7b7196dab05..04e6f6f9b742ef 100644 --- a/drivers/gpu/drm/panfrost/panfrost_job.c +++ b/drivers/gpu/drm/panfrost/panfrost_job.c @@ -20,12 +20,22 @@ #include "panfrost_gpu.h" #include "panfrost_mmu.h" +#define JOB_TIMEOUT_MS 500 + #define job_write(dev, reg, data) writel(data, dev->iomem + (reg)) #define job_read(dev, reg) readl(dev->iomem + (reg)) +enum panfrost_queue_status { + PANFROST_QUEUE_STATUS_ACTIVE, + PANFROST_QUEUE_STATUS_STOPPED, + PANFROST_QUEUE_STATUS_STARTING, + PANFROST_QUEUE_STATUS_FAULT_PENDING, +}; + struct panfrost_queue_state { struct drm_gpu_scheduler sched; - + atomic_t status; + struct mutex lock; u64 fence_context; u64 emit_seqno; }; @@ -369,13 +379,64 @@ void panfrost_job_enable_interrupts(struct panfrost_device *pfdev) job_write(pfdev, JOB_INT_MASK, irq_mask); } +static bool panfrost_scheduler_stop(struct panfrost_queue_state *queue, + struct drm_sched_job *bad) +{ + enum panfrost_queue_status old_status; + bool stopped = false; + + mutex_lock(&queue->lock); + old_status = atomic_xchg(&queue->status, + PANFROST_QUEUE_STATUS_STOPPED); + if (old_status == PANFROST_QUEUE_STATUS_STOPPED) + goto out; + + WARN_ON(old_status != PANFROST_QUEUE_STATUS_ACTIVE); + drm_sched_stop(&queue->sched, bad); + if (bad) + drm_sched_increase_karma(bad); + + stopped = true; + + /* + * Set the timeout to max so the timer doesn't get started + * when we return from the timeout handler (restored in + * panfrost_scheduler_start()). + */ + queue->sched.timeout = MAX_SCHEDULE_TIMEOUT; + +out: + mutex_unlock(&queue->lock); + + return stopped; +} + +static void panfrost_scheduler_start(struct panfrost_queue_state *queue) +{ + enum panfrost_queue_status old_status; + + mutex_lock(&queue->lock); + old_status = atomic_xchg(&queue->status, + PANFROST_QUEUE_STATUS_STARTING); + WARN_ON(old_status != PANFROST_QUEUE_STATUS_STOPPED); + + /* Restore the original timeout before starting the scheduler. */ + queue->sched.timeout = msecs_to_jiffies(JOB_TIMEOUT_MS); + drm_sched_resubmit_jobs(&queue->sched); + drm_sched_start(&queue->sched, true); + old_status = atomic_xchg(&queue->status, + PANFROST_QUEUE_STATUS_ACTIVE); + if (old_status == PANFROST_QUEUE_STATUS_FAULT_PENDING) + drm_sched_fault(&queue->sched); + + mutex_unlock(&queue->lock); +} + static void panfrost_job_timedout(struct drm_sched_job *sched_job) { struct panfrost_job *job = to_panfrost_job(sched_job); struct panfrost_device *pfdev = job->pfdev; int js = panfrost_job_get_slot(job); - unsigned long flags; - int i; /* * If the GPU managed to complete this jobs fence, the timeout is @@ -392,40 +453,13 @@ static void panfrost_job_timedout(struct drm_sched_job *sched_job) job_read(pfdev, JS_TAIL_LO(js)), sched_job); - if (!mutex_trylock(&pfdev->reset_lock)) + /* Scheduler is already stopped, nothing to do. */ + if (!panfrost_scheduler_stop(&pfdev->js->queue[js], sched_job)) return; - for (i = 0; i < NUM_JOB_SLOTS; i++) { - struct drm_gpu_scheduler *sched = &pfdev->js->queue[i].sched; - - drm_sched_stop(sched, sched_job); - if (js != i) - /* Ensure any timeouts on other slots have finished */ - cancel_delayed_work_sync(&sched->work_tdr); - } - - drm_sched_increase_karma(sched_job); - - spin_lock_irqsave(&pfdev->js->job_lock, flags); - for (i = 0; i < NUM_JOB_SLOTS; i++) { - if (pfdev->jobs[i]) { - pm_runtime_put_noidle(pfdev->dev); - panfrost_devfreq_record_idle(&pfdev->pfdevfreq); - pfdev->jobs[i] = NULL; - } - } - spin_unlock_irqrestore(&pfdev->js->job_lock, flags); - - panfrost_device_reset(pfdev); - - for (i = 0; i < NUM_JOB_SLOTS; i++) - drm_sched_resubmit_jobs(&pfdev->js->queue[i].sched); - - /* restart scheduler after GPU is usable again */ - for (i = 0; i < NUM_JOB_SLOTS; i++) - drm_sched_start(&pfdev->js->queue[i].sched, true); - - mutex_unlock(&pfdev->reset_lock); + /* Schedule a reset if there's no reset in progress. */ + if (!atomic_xchg(&pfdev->reset.pending, 1)) + schedule_work(&pfdev->reset.work); } static const struct drm_sched_backend_ops panfrost_sched_ops = { @@ -457,6 +491,8 @@ static irqreturn_t panfrost_job_irq_handler(int irq, void *data) job_write(pfdev, JOB_INT_CLEAR, mask); if (status & JOB_INT_MASK_ERR(j)) { + enum panfrost_queue_status old_status; + job_write(pfdev, JS_COMMAND_NEXT(j), JS_COMMAND_NOP); dev_err(pfdev->dev, "js fault, js=%d, status=%s, head=0x%x, tail=0x%x", @@ -465,7 +501,18 @@ static irqreturn_t panfrost_job_irq_handler(int irq, void *data) job_read(pfdev, JS_HEAD_LO(j)), job_read(pfdev, JS_TAIL_LO(j))); - drm_sched_fault(&pfdev->js->queue[j].sched); + /* + * When the queue is being restarted we don't report + * faults directly to avoid races between the timeout + * and reset handlers. panfrost_scheduler_start() will + * call drm_sched_fault() after the queue has been + * started if status == FAULT_PENDING. + */ + old_status = atomic_cmpxchg(&pfdev->js->queue[j].status, + PANFROST_QUEUE_STATUS_STARTING, + PANFROST_QUEUE_STATUS_FAULT_PENDING); + if (old_status == PANFROST_QUEUE_STATUS_ACTIVE) + drm_sched_fault(&pfdev->js->queue[j].sched); } if (status & JOB_INT_MASK_DONE(j)) { @@ -492,11 +539,66 @@ static irqreturn_t panfrost_job_irq_handler(int irq, void *data) return IRQ_HANDLED; } +static void panfrost_reset(struct work_struct *work) +{ + struct panfrost_device *pfdev = container_of(work, + struct panfrost_device, + reset.work); + unsigned long flags; + unsigned int i; + bool cookie; + + cookie = dma_fence_begin_signalling(); + for (i = 0; i < NUM_JOB_SLOTS; i++) { + /* + * We want pending timeouts to be handled before we attempt + * to stop the scheduler. If we don't do that and the timeout + * handler is in flight, it might have removed the bad job + * from the list, and we'll lose this job if the reset handler + * enters the critical section in panfrost_scheduler_stop() + * before the timeout handler. + * + * Timeout is set to MAX_SCHEDULE_TIMEOUT - 1 because we need + * something big enough to make sure the timer will not expire + * before we manage to stop the scheduler, but we can't use + * MAX_SCHEDULE_TIMEOUT because drm_sched_get_cleanup_job() + * considers that as 'timer is not running' and will dequeue + * the job without making sure the timeout handler is not + * running. + */ + pfdev->js->queue[i].sched.timeout = MAX_SCHEDULE_TIMEOUT - 1; + cancel_delayed_work_sync(&pfdev->js->queue[i].sched.work_tdr); + panfrost_scheduler_stop(&pfdev->js->queue[i], NULL); + } + + /* All timers have been stopped, we can safely reset the pending state. */ + atomic_set(&pfdev->reset.pending, 0); + + spin_lock_irqsave(&pfdev->js->job_lock, flags); + for (i = 0; i < NUM_JOB_SLOTS; i++) { + if (pfdev->jobs[i]) { + pm_runtime_put_noidle(pfdev->dev); + panfrost_devfreq_record_idle(&pfdev->pfdevfreq); + pfdev->jobs[i] = NULL; + } + } + spin_unlock_irqrestore(&pfdev->js->job_lock, flags); + + panfrost_device_reset(pfdev); + + for (i = 0; i < NUM_JOB_SLOTS; i++) + panfrost_scheduler_start(&pfdev->js->queue[i]); + + dma_fence_end_signalling(cookie); +} + int panfrost_job_init(struct panfrost_device *pfdev) { struct panfrost_job_slot *js; int ret, j, irq; + INIT_WORK(&pfdev->reset.work, panfrost_reset); + pfdev->js = js = devm_kzalloc(pfdev->dev, sizeof(*js), GFP_KERNEL); if (!js) return -ENOMEM; @@ -515,11 +617,13 @@ int panfrost_job_init(struct panfrost_device *pfdev) } for (j = 0; j < NUM_JOB_SLOTS; j++) { + mutex_init(&js->queue[j].lock); + js->queue[j].fence_context = dma_fence_context_alloc(1); ret = drm_sched_init(&js->queue[j].sched, &panfrost_sched_ops, - 1, 0, msecs_to_jiffies(500), + 1, 0, msecs_to_jiffies(JOB_TIMEOUT_MS), "pan_js"); if (ret) { dev_err(pfdev->dev, "Failed to create scheduler: %d.", ret); @@ -545,8 +649,10 @@ void panfrost_job_fini(struct panfrost_device *pfdev) job_write(pfdev, JOB_INT_MASK, 0); - for (j = 0; j < NUM_JOB_SLOTS; j++) + for (j = 0; j < NUM_JOB_SLOTS; j++) { drm_sched_fini(&js->queue[j].sched); + mutex_destroy(&js->queue[j].lock); + } } diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.c b/drivers/gpu/drm/panfrost/panfrost_mmu.c index 776448c527ea9c..be8d68fb0e11e2 100644 --- a/drivers/gpu/drm/panfrost/panfrost_mmu.c +++ b/drivers/gpu/drm/panfrost/panfrost_mmu.c @@ -371,6 +371,7 @@ int panfrost_mmu_pgtable_alloc(struct panfrost_file_priv *priv) .pgsize_bitmap = SZ_4K | SZ_2M, .ias = FIELD_GET(0xff, pfdev->features.mmu_features), .oas = FIELD_GET(0xff00, pfdev->features.mmu_features), + .coherent_walk = pfdev->coherent, .tlb = &mmu_tlb_ops, .iommu_dev = pfdev->dev, }; diff --git a/drivers/gpu/drm/tve200/tve200_drv.c b/drivers/gpu/drm/tve200/tve200_drv.c index c3aa39bd38ecdf..b5259cb1383fc2 100644 --- a/drivers/gpu/drm/tve200/tve200_drv.c +++ b/drivers/gpu/drm/tve200/tve200_drv.c @@ -200,8 +200,8 @@ static int tve200_probe(struct platform_device *pdev) } irq = platform_get_irq(pdev, 0); - if (!irq) { - ret = -EINVAL; + if (irq < 0) { + ret = irq; goto clk_disable; } diff --git a/drivers/gpu/drm/udl/udl_modeset.c b/drivers/gpu/drm/udl/udl_modeset.c index fef43f4e3bac4f..edcfd8c120c44c 100644 --- a/drivers/gpu/drm/udl/udl_modeset.c +++ b/drivers/gpu/drm/udl/udl_modeset.c @@ -303,8 +303,10 @@ static int udl_handle_damage(struct drm_framebuffer *fb, int x, int y, } urb = udl_get_urb(dev); - if (!urb) + if (!urb) { + ret = -ENOMEM; goto out_drm_gem_shmem_vunmap; + } cmd = urb->transfer_buffer; for (i = clip.y1; i < clip.y2; i++) { diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c index afc178b0d89f47..eaba98e15de468 100644 --- a/drivers/gpu/drm/vc4/vc4_hdmi.c +++ b/drivers/gpu/drm/vc4/vc4_hdmi.c @@ -1268,6 +1268,7 @@ static int vc4_hdmi_audio_init(struct vc4_hdmi *vc4_hdmi) card->dai_link = dai_link; card->num_links = 1; card->name = vc4_hdmi->variant->card_name; + card->driver_name = "vc4-hdmi"; card->dev = dev; card->owner = THIS_MODULE; diff --git a/drivers/gpu/drm/vc4/vc4_hvs.c b/drivers/gpu/drm/vc4/vc4_hvs.c index b72b2bd05a815c..ad691571d759f9 100644 --- a/drivers/gpu/drm/vc4/vc4_hvs.c +++ b/drivers/gpu/drm/vc4/vc4_hvs.c @@ -618,11 +618,11 @@ static int vc4_hvs_bind(struct device *dev, struct device *master, void *data) * for now we just allocate globally. */ if (!hvs->hvs5) - /* 96kB */ - drm_mm_init(&hvs->lbm_mm, 0, 96 * 1024); + /* 48k words of 2x12-bit pixels */ + drm_mm_init(&hvs->lbm_mm, 0, 48 * 1024); else - /* 70k words */ - drm_mm_init(&hvs->lbm_mm, 0, 70 * 2 * 1024); + /* 60k words of 4x12-bit pixels */ + drm_mm_init(&hvs->lbm_mm, 0, 60 * 1024); /* Upload filter kernels. We only have the one for now, so we * keep it around for the lifetime of the driver. diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c index 6b39cc2ca18d09..5612cab552270d 100644 --- a/drivers/gpu/drm/vc4/vc4_plane.c +++ b/drivers/gpu/drm/vc4/vc4_plane.c @@ -437,6 +437,7 @@ static void vc4_write_ppf(struct vc4_plane_state *vc4_state, u32 src, u32 dst) static u32 vc4_lbm_size(struct drm_plane_state *state) { struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); + struct vc4_dev *vc4 = to_vc4_dev(state->plane->dev); u32 pix_per_line; u32 lbm; @@ -472,7 +473,11 @@ static u32 vc4_lbm_size(struct drm_plane_state *state) lbm = pix_per_line * 16; } - lbm = roundup(lbm, 32); + /* Align it to 64 or 128 (hvs5) bytes */ + lbm = roundup(lbm, vc4->hvs->hvs5 ? 128 : 64); + + /* Each "word" of the LBM memory contains 2 or 4 (hvs5) pixels */ + lbm /= vc4->hvs->hvs5 ? 4 : 2; return lbm; } @@ -912,9 +917,9 @@ static int vc4_plane_mode_set(struct drm_plane *plane, if (!vc4_state->is_unity) { vc4_dlist_write(vc4_state, VC4_SET_FIELD(vc4_state->crtc_w, - SCALER_POS1_SCL_WIDTH) | + SCALER5_POS1_SCL_WIDTH) | VC4_SET_FIELD(vc4_state->crtc_h, - SCALER_POS1_SCL_HEIGHT)); + SCALER5_POS1_SCL_HEIGHT)); } /* Position Word 2: Source Image Size */ diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig index 612629678c8457..9b56226ce0d1c7 100644 --- a/drivers/hid/Kconfig +++ b/drivers/hid/Kconfig @@ -899,6 +899,7 @@ config HID_SONY depends on NEW_LEDS depends on LEDS_CLASS select POWER_SUPPLY + select CRC32 help Support for diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index f170feaac40bae..94180c63571edb 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -387,6 +387,7 @@ #define USB_DEVICE_ID_TOSHIBA_CLICK_L9W 0x0401 #define USB_DEVICE_ID_HP_X2 0x074d #define USB_DEVICE_ID_HP_X2_10_COVER 0x0755 +#define USB_DEVICE_ID_ASUS_UX550_TOUCHSCREEN 0x2706 #define USB_VENDOR_ID_ELECOM 0x056e #define USB_DEVICE_ID_ELECOM_BM084 0x0061 diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c index 4dca1139245934..32024905fd70f5 100644 --- a/drivers/hid/hid-input.c +++ b/drivers/hid/hid-input.c @@ -322,6 +322,8 @@ static const struct hid_device_id hid_battery_quirks[] = { { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_DINOVO_EDGE_KBD), HID_BATTERY_QUIRK_IGNORE }, + { HID_USB_DEVICE(USB_VENDOR_ID_ELAN, USB_DEVICE_ID_ASUS_UX550_TOUCHSCREEN), + HID_BATTERY_QUIRK_IGNORE }, {} }; diff --git a/drivers/hid/hid-logitech-dj.c b/drivers/hid/hid-logitech-dj.c index 1ffcfc9a1e033b..45e7e0bdd382bd 100644 --- a/drivers/hid/hid-logitech-dj.c +++ b/drivers/hid/hid-logitech-dj.c @@ -1869,6 +1869,10 @@ static const struct hid_device_id logi_dj_receivers[] = { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, 0xc531), .driver_data = recvr_type_gaming_hidpp}, + { /* Logitech G602 receiver (0xc537) */ + HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, + 0xc537), + .driver_data = recvr_type_gaming_hidpp}, { /* Logitech lightspeed receiver (0xc539) */ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_NANO_RECEIVER_LIGHTSPEED_1), diff --git a/drivers/hid/hid-logitech-hidpp.c b/drivers/hid/hid-logitech-hidpp.c index 0ca7231195473e..74ebfb12c360e0 100644 --- a/drivers/hid/hid-logitech-hidpp.c +++ b/drivers/hid/hid-logitech-hidpp.c @@ -4051,6 +4051,8 @@ static const struct hid_device_id hidpp_devices[] = { { /* MX Master mouse over Bluetooth */ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb012), .driver_data = HIDPP_QUIRK_HI_RES_SCROLL_X2121 }, + { /* MX Ergo trackball over Bluetooth */ + HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb01d) }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb01e), .driver_data = HIDPP_QUIRK_HI_RES_SCROLL_X2121 }, { /* MX Master 3 mouse over Bluetooth */ diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c index d670bcd57bdef8..8429ebe7097e4c 100644 --- a/drivers/hid/hid-multitouch.c +++ b/drivers/hid/hid-multitouch.c @@ -758,7 +758,8 @@ static int mt_touch_input_mapping(struct hid_device *hdev, struct hid_input *hi, MT_STORE_FIELD(inrange_state); return 1; case HID_DG_CONFIDENCE: - if (cls->name == MT_CLS_WIN_8 && + if ((cls->name == MT_CLS_WIN_8 || + cls->name == MT_CLS_WIN_8_FORCE_MULTI_INPUT) && (field->application == HID_DG_TOUCHPAD || field->application == HID_DG_TOUCHSCREEN)) app->quirks |= MT_QUIRK_CONFIDENCE; @@ -2054,6 +2055,10 @@ static const struct hid_device_id mt_devices[] = { HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8, USB_VENDOR_ID_SYNAPTICS, 0xce08) }, + { .driver_data = MT_CLS_WIN_8_FORCE_MULTI_INPUT, + HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8, + USB_VENDOR_ID_SYNAPTICS, 0xce09) }, + /* TopSeed panels */ { .driver_data = MT_CLS_TOPSEED, MT_USB_DEVICE(USB_VENDOR_ID_TOPSEED2, diff --git a/drivers/hid/i2c-hid/i2c-hid-dmi-quirks.c b/drivers/hid/i2c-hid/i2c-hid-dmi-quirks.c index 35f3bfc3e6f591..8e0f67455c0988 100644 --- a/drivers/hid/i2c-hid/i2c-hid-dmi-quirks.c +++ b/drivers/hid/i2c-hid/i2c-hid-dmi-quirks.c @@ -405,6 +405,14 @@ static const struct dmi_system_id i2c_hid_dmi_desc_override_table[] = { }, .driver_data = (void *)&sipodev_desc }, + { + .ident = "Vero K147", + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "VERO"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "K147"), + }, + .driver_data = (void *)&sipodev_desc + }, { } /* Terminate list */ }; diff --git a/drivers/hid/wacom_sys.c b/drivers/hid/wacom_sys.c index cd71e713394464..73dafa60080f1f 100644 --- a/drivers/hid/wacom_sys.c +++ b/drivers/hid/wacom_sys.c @@ -147,9 +147,9 @@ static int wacom_wac_pen_serial_enforce(struct hid_device *hdev, } if (flush) - wacom_wac_queue_flush(hdev, &wacom_wac->pen_fifo); + wacom_wac_queue_flush(hdev, wacom_wac->pen_fifo); else if (insert) - wacom_wac_queue_insert(hdev, &wacom_wac->pen_fifo, + wacom_wac_queue_insert(hdev, wacom_wac->pen_fifo, raw_data, report_size); return insert && !flush; @@ -1270,6 +1270,38 @@ static int wacom_devm_sysfs_create_group(struct wacom *wacom, group); } +static void wacom_devm_kfifo_release(struct device *dev, void *res) +{ + struct kfifo_rec_ptr_2 *devres = res; + + kfifo_free(devres); +} + +static int wacom_devm_kfifo_alloc(struct wacom *wacom) +{ + struct wacom_wac *wacom_wac = &wacom->wacom_wac; + struct kfifo_rec_ptr_2 *pen_fifo; + int error; + + pen_fifo = devres_alloc(wacom_devm_kfifo_release, + sizeof(struct kfifo_rec_ptr_2), + GFP_KERNEL); + + if (!pen_fifo) + return -ENOMEM; + + error = kfifo_alloc(pen_fifo, WACOM_PKGLEN_MAX, GFP_KERNEL); + if (error) { + devres_free(pen_fifo); + return error; + } + + devres_add(&wacom->hdev->dev, pen_fifo); + wacom_wac->pen_fifo = pen_fifo; + + return 0; +} + enum led_brightness wacom_leds_brightness_get(struct wacom_led *led) { struct wacom *wacom = led->wacom; @@ -2724,7 +2756,7 @@ static int wacom_probe(struct hid_device *hdev, if (features->check_for_hid_type && features->hid_type != hdev->type) return -ENODEV; - error = kfifo_alloc(&wacom_wac->pen_fifo, WACOM_PKGLEN_MAX, GFP_KERNEL); + error = wacom_devm_kfifo_alloc(wacom); if (error) return error; @@ -2786,8 +2818,6 @@ static void wacom_remove(struct hid_device *hdev) if (wacom->wacom_wac.features.type != REMOTE) wacom_release_resources(wacom); - - kfifo_free(&wacom_wac->pen_fifo); } #ifdef CONFIG_PM diff --git a/drivers/hid/wacom_wac.h b/drivers/hid/wacom_wac.h index da612b6e9c7796..195910dd2154e2 100644 --- a/drivers/hid/wacom_wac.h +++ b/drivers/hid/wacom_wac.h @@ -342,7 +342,7 @@ struct wacom_wac { struct input_dev *pen_input; struct input_dev *touch_input; struct input_dev *pad_input; - struct kfifo_rec_ptr_2 pen_fifo; + struct kfifo_rec_ptr_2 *pen_fifo; int pid; int num_contacts_left; u8 bt_features; diff --git a/drivers/hsi/controllers/omap_ssi_core.c b/drivers/hsi/controllers/omap_ssi_core.c index fa69b94debd9b5..7596dc16464843 100644 --- a/drivers/hsi/controllers/omap_ssi_core.c +++ b/drivers/hsi/controllers/omap_ssi_core.c @@ -355,7 +355,7 @@ static int ssi_add_controller(struct hsi_controller *ssi, err = ida_simple_get(&platform_omap_ssi_ida, 0, 0, GFP_KERNEL); if (err < 0) - goto out_err; + return err; ssi->id = err; ssi->owner = THIS_MODULE; diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 4fad3e6745e53f..a5a402e776c77f 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -2542,7 +2542,6 @@ static void hv_kexec_handler(void) /* Make sure conn_state is set as hv_synic_cleanup checks for it */ mb(); cpuhp_remove_state(hyperv_cpuhp_online); - hyperv_cleanup(); }; static void hv_crash_handler(struct pt_regs *regs) @@ -2558,7 +2557,6 @@ static void hv_crash_handler(struct pt_regs *regs) cpu = smp_processor_id(); hv_stimer_cleanup(cpu); hv_synic_disable_regs(cpu); - hyperv_cleanup(); }; static int hv_synic_suspend(void) diff --git a/drivers/hwmon/amd_energy.c b/drivers/hwmon/amd_energy.c index 3197cda7bcd9fc..f22154863c98a0 100644 --- a/drivers/hwmon/amd_energy.c +++ b/drivers/hwmon/amd_energy.c @@ -222,7 +222,7 @@ static int amd_create_sensor(struct device *dev, */ cpus = num_present_cpus() / num_siblings; - s_config = devm_kcalloc(dev, cpus + sockets, + s_config = devm_kcalloc(dev, cpus + sockets + 1, sizeof(u32), GFP_KERNEL); if (!s_config) return -ENOMEM; @@ -254,6 +254,7 @@ static int amd_create_sensor(struct device *dev, scnprintf(label_l[i], 10, "Esocket%u", (i - cpus)); } + s_config[i] = 0; return 0; } diff --git a/drivers/hwmon/ina3221.c b/drivers/hwmon/ina3221.c index 41fb17e0d6416b..ad11cbddc3a7b1 100644 --- a/drivers/hwmon/ina3221.c +++ b/drivers/hwmon/ina3221.c @@ -489,7 +489,7 @@ static int ina3221_write_enable(struct device *dev, int channel, bool enable) /* For enabling routine, increase refcount and resume() at first */ if (enable) { - ret = pm_runtime_get_sync(ina->pm_dev); + ret = pm_runtime_resume_and_get(ina->pm_dev); if (ret < 0) { dev_err(dev, "Failed to get PM runtime\n"); return ret; diff --git a/drivers/hwmon/k10temp.c b/drivers/hwmon/k10temp.c index a250481b5a97f8..3bc2551577a30f 100644 --- a/drivers/hwmon/k10temp.c +++ b/drivers/hwmon/k10temp.c @@ -11,13 +11,6 @@ * convert raw register values is from https://github.com/ocerman/zenpower. * The information is not confirmed from chip datasheets, but experiments * suggest that it provides reasonable temperature values. - * - Register addresses to read chip voltage and current are also from - * https://github.com/ocerman/zenpower, and not confirmed from chip - * datasheets. Current calibration is board specific and not typically - * shared by board vendors. For this reason, current values are - * normalized to report 1A/LSB for core current and and 0.25A/LSB for SoC - * current. Reported values can be adjusted using the sensors configuration - * file. */ #include @@ -109,10 +102,7 @@ struct k10temp_data { int temp_offset; u32 temp_adjust_mask; u32 show_temp; - u32 svi_addr[2]; bool is_zen; - bool show_current; - int cfactor[2]; }; #define TCTL_BIT 0 @@ -137,16 +127,6 @@ static const struct tctl_offset tctl_offset_table[] = { { 0x17, "AMD Ryzen Threadripper 29", 27000 }, /* 29{20,50,70,90}[W]X */ }; -static bool is_threadripper(void) -{ - return strstr(boot_cpu_data.x86_model_id, "Threadripper"); -} - -static bool is_epyc(void) -{ - return strstr(boot_cpu_data.x86_model_id, "EPYC"); -} - static void read_htcreg_pci(struct pci_dev *pdev, u32 *regval) { pci_read_config_dword(pdev, REG_HARDWARE_THERMAL_CONTROL, regval); @@ -211,16 +191,6 @@ static const char *k10temp_temp_label[] = { "Tccd8", }; -static const char *k10temp_in_label[] = { - "Vcore", - "Vsoc", -}; - -static const char *k10temp_curr_label[] = { - "Icore", - "Isoc", -}; - static int k10temp_read_labels(struct device *dev, enum hwmon_sensor_types type, u32 attr, int channel, const char **str) @@ -229,50 +199,6 @@ static int k10temp_read_labels(struct device *dev, case hwmon_temp: *str = k10temp_temp_label[channel]; break; - case hwmon_in: - *str = k10temp_in_label[channel]; - break; - case hwmon_curr: - *str = k10temp_curr_label[channel]; - break; - default: - return -EOPNOTSUPP; - } - return 0; -} - -static int k10temp_read_curr(struct device *dev, u32 attr, int channel, - long *val) -{ - struct k10temp_data *data = dev_get_drvdata(dev); - u32 regval; - - switch (attr) { - case hwmon_curr_input: - amd_smn_read(amd_pci_dev_to_node_id(data->pdev), - data->svi_addr[channel], ®val); - *val = DIV_ROUND_CLOSEST(data->cfactor[channel] * - (regval & 0xff), - 1000); - break; - default: - return -EOPNOTSUPP; - } - return 0; -} - -static int k10temp_read_in(struct device *dev, u32 attr, int channel, long *val) -{ - struct k10temp_data *data = dev_get_drvdata(dev); - u32 regval; - - switch (attr) { - case hwmon_in_input: - amd_smn_read(amd_pci_dev_to_node_id(data->pdev), - data->svi_addr[channel], ®val); - regval = (regval >> 16) & 0xff; - *val = DIV_ROUND_CLOSEST(155000 - regval * 625, 100); - break; default: return -EOPNOTSUPP; } @@ -331,10 +257,6 @@ static int k10temp_read(struct device *dev, enum hwmon_sensor_types type, switch (type) { case hwmon_temp: return k10temp_read_temp(dev, attr, channel, val); - case hwmon_in: - return k10temp_read_in(dev, attr, channel, val); - case hwmon_curr: - return k10temp_read_curr(dev, attr, channel, val); default: return -EOPNOTSUPP; } @@ -383,11 +305,6 @@ static umode_t k10temp_is_visible(const void *_data, return 0; } break; - case hwmon_in: - case hwmon_curr: - if (!data->show_current) - return 0; - break; default: return 0; } @@ -517,20 +434,10 @@ static int k10temp_probe(struct pci_dev *pdev, const struct pci_device_id *id) case 0x8: /* Zen+ */ case 0x11: /* Zen APU */ case 0x18: /* Zen+ APU */ - data->show_current = !is_threadripper() && !is_epyc(); - data->svi_addr[0] = F17H_M01H_SVI_TEL_PLANE0; - data->svi_addr[1] = F17H_M01H_SVI_TEL_PLANE1; - data->cfactor[0] = F17H_M01H_CFACTOR_ICORE; - data->cfactor[1] = F17H_M01H_CFACTOR_ISOC; k10temp_get_ccd_support(pdev, data, 4); break; case 0x31: /* Zen2 Threadripper */ case 0x71: /* Zen2 */ - data->show_current = !is_threadripper() && !is_epyc(); - data->cfactor[0] = F17H_M31H_CFACTOR_ICORE; - data->cfactor[1] = F17H_M31H_CFACTOR_ISOC; - data->svi_addr[0] = F17H_M31H_SVI_TEL_PLANE0; - data->svi_addr[1] = F17H_M31H_SVI_TEL_PLANE1; k10temp_get_ccd_support(pdev, data, 8); break; } @@ -542,11 +449,6 @@ static int k10temp_probe(struct pci_dev *pdev, const struct pci_device_id *id) switch (boot_cpu_data.x86_model) { case 0x0 ... 0x1: /* Zen3 */ - data->show_current = true; - data->svi_addr[0] = F19H_M01_SVI_TEL_PLANE0; - data->svi_addr[1] = F19H_M01_SVI_TEL_PLANE1; - data->cfactor[0] = F19H_M01H_CFACTOR_ICORE; - data->cfactor[1] = F19H_M01H_CFACTOR_ISOC; k10temp_get_ccd_support(pdev, data, 8); break; } diff --git a/drivers/hwmon/pwm-fan.c b/drivers/hwmon/pwm-fan.c index 1f63807c0399eb..ec171f2b684a17 100644 --- a/drivers/hwmon/pwm-fan.c +++ b/drivers/hwmon/pwm-fan.c @@ -324,8 +324,18 @@ static int pwm_fan_probe(struct platform_device *pdev) ctx->pwm_value = MAX_PWM; - /* Set duty cycle to maximum allowed and enable PWM output */ pwm_init_state(ctx->pwm, &state); + /* + * __set_pwm assumes that MAX_PWM * (period - 1) fits into an unsigned + * long. Check this here to prevent the fan running at a too low + * frequency. + */ + if (state.period > ULONG_MAX / MAX_PWM + 1) { + dev_err(dev, "Configured period too big\n"); + return -EINVAL; + } + + /* Set duty cycle to maximum allowed and enable PWM output */ state.duty_cycle = ctx->pwm->args.period - 1; state.enabled = true; diff --git a/drivers/hwtracing/coresight/coresight-catu.c b/drivers/hwtracing/coresight/coresight-catu.c index 99430f6cf5a5d1..a61313f320bda2 100644 --- a/drivers/hwtracing/coresight/coresight-catu.c +++ b/drivers/hwtracing/coresight/coresight-catu.c @@ -567,7 +567,7 @@ static int catu_probe(struct amba_device *adev, const struct amba_id *id) return ret; } -static int __exit catu_remove(struct amba_device *adev) +static int catu_remove(struct amba_device *adev) { struct catu_drvdata *drvdata = dev_get_drvdata(&adev->dev); diff --git a/drivers/hwtracing/coresight/coresight-cti-core.c b/drivers/hwtracing/coresight/coresight-cti-core.c index d28eae93e55c8e..61dbc1afd8da50 100644 --- a/drivers/hwtracing/coresight/coresight-cti-core.c +++ b/drivers/hwtracing/coresight/coresight-cti-core.c @@ -836,7 +836,7 @@ static void cti_device_release(struct device *dev) if (drvdata->csdev_release) drvdata->csdev_release(dev); } -static int __exit cti_remove(struct amba_device *adev) +static int cti_remove(struct amba_device *adev) { struct cti_drvdata *drvdata = dev_get_drvdata(&adev->dev); diff --git a/drivers/hwtracing/coresight/coresight-etb10.c b/drivers/hwtracing/coresight/coresight-etb10.c index 248cc82c838e72..0cf6f0b947b6f8 100644 --- a/drivers/hwtracing/coresight/coresight-etb10.c +++ b/drivers/hwtracing/coresight/coresight-etb10.c @@ -176,6 +176,7 @@ static int etb_enable_perf(struct coresight_device *csdev, void *data) unsigned long flags; struct etb_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); struct perf_output_handle *handle = data; + struct cs_buffers *buf = etm_perf_sink_config(handle); spin_lock_irqsave(&drvdata->spinlock, flags); @@ -186,7 +187,7 @@ static int etb_enable_perf(struct coresight_device *csdev, void *data) } /* Get a handle on the pid of the process to monitor */ - pid = task_pid_nr(handle->event->owner); + pid = buf->pid; if (drvdata->pid != -1 && drvdata->pid != pid) { ret = -EBUSY; @@ -383,6 +384,7 @@ static void *etb_alloc_buffer(struct coresight_device *csdev, if (!buf) return NULL; + buf->pid = task_pid_nr(event->owner); buf->snapshot = overwrite; buf->nr_pages = nr_pages; buf->data_pages = pages; @@ -801,7 +803,7 @@ static int etb_probe(struct amba_device *adev, const struct amba_id *id) return ret; } -static int __exit etb_remove(struct amba_device *adev) +static int etb_remove(struct amba_device *adev) { struct etb_drvdata *drvdata = dev_get_drvdata(&adev->dev); diff --git a/drivers/hwtracing/coresight/coresight-etm3x-core.c b/drivers/hwtracing/coresight/coresight-etm3x-core.c index 47f610b1c2b18a..5bf5a5a4ce6d15 100644 --- a/drivers/hwtracing/coresight/coresight-etm3x-core.c +++ b/drivers/hwtracing/coresight/coresight-etm3x-core.c @@ -902,14 +902,14 @@ static int etm_probe(struct amba_device *adev, const struct amba_id *id) return 0; } -static void __exit clear_etmdrvdata(void *info) +static void clear_etmdrvdata(void *info) { int cpu = *(int *)info; etmdrvdata[cpu] = NULL; } -static int __exit etm_remove(struct amba_device *adev) +static int etm_remove(struct amba_device *adev) { struct etm_drvdata *drvdata = dev_get_drvdata(&adev->dev); diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c b/drivers/hwtracing/coresight/coresight-etm4x-core.c index abd706b216ac90..95b54b0a362520 100644 --- a/drivers/hwtracing/coresight/coresight-etm4x-core.c +++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c @@ -124,8 +124,8 @@ static int etm4_enable_hw(struct etmv4_drvdata *drvdata) if (coresight_timeout(drvdata->base, TRCSTATR, TRCSTATR_IDLE_BIT, 1)) dev_err(etm_dev, "timeout while waiting for Idle Trace Status\n"); - - writel_relaxed(config->pe_sel, drvdata->base + TRCPROCSELR); + if (drvdata->nr_pe) + writel_relaxed(config->pe_sel, drvdata->base + TRCPROCSELR); writel_relaxed(config->cfg, drvdata->base + TRCCONFIGR); /* nothing specific implemented */ writel_relaxed(0x0, drvdata->base + TRCAUXCTLR); @@ -141,8 +141,9 @@ static int etm4_enable_hw(struct etmv4_drvdata *drvdata) writel_relaxed(config->viiectlr, drvdata->base + TRCVIIECTLR); writel_relaxed(config->vissctlr, drvdata->base + TRCVISSCTLR); - writel_relaxed(config->vipcssctlr, - drvdata->base + TRCVIPCSSCTLR); + if (drvdata->nr_pe_cmp) + writel_relaxed(config->vipcssctlr, + drvdata->base + TRCVIPCSSCTLR); for (i = 0; i < drvdata->nrseqstate - 1; i++) writel_relaxed(config->seq_ctrl[i], drvdata->base + TRCSEQEVRn(i)); @@ -187,13 +188,15 @@ static int etm4_enable_hw(struct etmv4_drvdata *drvdata) writeq_relaxed(config->ctxid_pid[i], drvdata->base + TRCCIDCVRn(i)); writel_relaxed(config->ctxid_mask0, drvdata->base + TRCCIDCCTLR0); - writel_relaxed(config->ctxid_mask1, drvdata->base + TRCCIDCCTLR1); + if (drvdata->numcidc > 4) + writel_relaxed(config->ctxid_mask1, drvdata->base + TRCCIDCCTLR1); for (i = 0; i < drvdata->numvmidc; i++) writeq_relaxed(config->vmid_val[i], drvdata->base + TRCVMIDCVRn(i)); writel_relaxed(config->vmid_mask0, drvdata->base + TRCVMIDCCTLR0); - writel_relaxed(config->vmid_mask1, drvdata->base + TRCVMIDCCTLR1); + if (drvdata->numvmidc > 4) + writel_relaxed(config->vmid_mask1, drvdata->base + TRCVMIDCCTLR1); if (!drvdata->skip_power_up) { /* @@ -779,7 +782,7 @@ static void etm4_init_arch_data(void *info) * LPOVERRIDE, bit[23] implementation supports * low-power state override */ - if (BMVAL(etmidr5, 23, 23)) + if (BMVAL(etmidr5, 23, 23) && (!drvdata->skip_power_up)) drvdata->lpoverride = true; else drvdata->lpoverride = false; @@ -1178,7 +1181,8 @@ static int etm4_cpu_save(struct etmv4_drvdata *drvdata) state = drvdata->save_state; state->trcprgctlr = readl(drvdata->base + TRCPRGCTLR); - state->trcprocselr = readl(drvdata->base + TRCPROCSELR); + if (drvdata->nr_pe) + state->trcprocselr = readl(drvdata->base + TRCPROCSELR); state->trcconfigr = readl(drvdata->base + TRCCONFIGR); state->trcauxctlr = readl(drvdata->base + TRCAUXCTLR); state->trceventctl0r = readl(drvdata->base + TRCEVENTCTL0R); @@ -1194,7 +1198,8 @@ static int etm4_cpu_save(struct etmv4_drvdata *drvdata) state->trcvictlr = readl(drvdata->base + TRCVICTLR); state->trcviiectlr = readl(drvdata->base + TRCVIIECTLR); state->trcvissctlr = readl(drvdata->base + TRCVISSCTLR); - state->trcvipcssctlr = readl(drvdata->base + TRCVIPCSSCTLR); + if (drvdata->nr_pe_cmp) + state->trcvipcssctlr = readl(drvdata->base + TRCVIPCSSCTLR); state->trcvdctlr = readl(drvdata->base + TRCVDCTLR); state->trcvdsacctlr = readl(drvdata->base + TRCVDSACCTLR); state->trcvdarcctlr = readl(drvdata->base + TRCVDARCCTLR); @@ -1240,10 +1245,12 @@ static int etm4_cpu_save(struct etmv4_drvdata *drvdata) state->trcvmidcvr[i] = readq(drvdata->base + TRCVMIDCVRn(i)); state->trccidcctlr0 = readl(drvdata->base + TRCCIDCCTLR0); - state->trccidcctlr1 = readl(drvdata->base + TRCCIDCCTLR1); + if (drvdata->numcidc > 4) + state->trccidcctlr1 = readl(drvdata->base + TRCCIDCCTLR1); state->trcvmidcctlr0 = readl(drvdata->base + TRCVMIDCCTLR0); - state->trcvmidcctlr1 = readl(drvdata->base + TRCVMIDCCTLR1); + if (drvdata->numvmidc > 4) + state->trcvmidcctlr1 = readl(drvdata->base + TRCVMIDCCTLR1); state->trcclaimset = readl(drvdata->base + TRCCLAIMCLR); @@ -1283,7 +1290,8 @@ static void etm4_cpu_restore(struct etmv4_drvdata *drvdata) writel_relaxed(state->trcclaimset, drvdata->base + TRCCLAIMSET); writel_relaxed(state->trcprgctlr, drvdata->base + TRCPRGCTLR); - writel_relaxed(state->trcprocselr, drvdata->base + TRCPROCSELR); + if (drvdata->nr_pe) + writel_relaxed(state->trcprocselr, drvdata->base + TRCPROCSELR); writel_relaxed(state->trcconfigr, drvdata->base + TRCCONFIGR); writel_relaxed(state->trcauxctlr, drvdata->base + TRCAUXCTLR); writel_relaxed(state->trceventctl0r, drvdata->base + TRCEVENTCTL0R); @@ -1299,7 +1307,8 @@ static void etm4_cpu_restore(struct etmv4_drvdata *drvdata) writel_relaxed(state->trcvictlr, drvdata->base + TRCVICTLR); writel_relaxed(state->trcviiectlr, drvdata->base + TRCVIIECTLR); writel_relaxed(state->trcvissctlr, drvdata->base + TRCVISSCTLR); - writel_relaxed(state->trcvipcssctlr, drvdata->base + TRCVIPCSSCTLR); + if (drvdata->nr_pe_cmp) + writel_relaxed(state->trcvipcssctlr, drvdata->base + TRCVIPCSSCTLR); writel_relaxed(state->trcvdctlr, drvdata->base + TRCVDCTLR); writel_relaxed(state->trcvdsacctlr, drvdata->base + TRCVDSACCTLR); writel_relaxed(state->trcvdarcctlr, drvdata->base + TRCVDARCCTLR); @@ -1350,10 +1359,12 @@ static void etm4_cpu_restore(struct etmv4_drvdata *drvdata) drvdata->base + TRCVMIDCVRn(i)); writel_relaxed(state->trccidcctlr0, drvdata->base + TRCCIDCCTLR0); - writel_relaxed(state->trccidcctlr1, drvdata->base + TRCCIDCCTLR1); + if (drvdata->numcidc > 4) + writel_relaxed(state->trccidcctlr1, drvdata->base + TRCCIDCCTLR1); writel_relaxed(state->trcvmidcctlr0, drvdata->base + TRCVMIDCCTLR0); - writel_relaxed(state->trcvmidcctlr1, drvdata->base + TRCVMIDCCTLR1); + if (drvdata->numvmidc > 4) + writel_relaxed(state->trcvmidcctlr1, drvdata->base + TRCVMIDCCTLR1); writel_relaxed(state->trcclaimset, drvdata->base + TRCCLAIMSET); @@ -1559,14 +1570,14 @@ static struct amba_cs_uci_id uci_id_etm4[] = { } }; -static void __exit clear_etmdrvdata(void *info) +static void clear_etmdrvdata(void *info) { int cpu = *(int *)info; etmdrvdata[cpu] = NULL; } -static int __exit etm4_remove(struct amba_device *adev) +static int etm4_remove(struct amba_device *adev) { struct etmv4_drvdata *drvdata = dev_get_drvdata(&adev->dev); diff --git a/drivers/hwtracing/coresight/coresight-funnel.c b/drivers/hwtracing/coresight/coresight-funnel.c index af40814ce5603d..3fc6c678b51d8e 100644 --- a/drivers/hwtracing/coresight/coresight-funnel.c +++ b/drivers/hwtracing/coresight/coresight-funnel.c @@ -274,7 +274,7 @@ static int funnel_probe(struct device *dev, struct resource *res) return ret; } -static int __exit funnel_remove(struct device *dev) +static int funnel_remove(struct device *dev) { struct funnel_drvdata *drvdata = dev_get_drvdata(dev); @@ -328,7 +328,7 @@ static int static_funnel_probe(struct platform_device *pdev) return ret; } -static int __exit static_funnel_remove(struct platform_device *pdev) +static int static_funnel_remove(struct platform_device *pdev) { funnel_remove(&pdev->dev); pm_runtime_disable(&pdev->dev); @@ -370,7 +370,7 @@ static int dynamic_funnel_probe(struct amba_device *adev, return funnel_probe(&adev->dev, &adev->res); } -static int __exit dynamic_funnel_remove(struct amba_device *adev) +static int dynamic_funnel_remove(struct amba_device *adev) { return funnel_remove(&adev->dev); } diff --git a/drivers/hwtracing/coresight/coresight-priv.h b/drivers/hwtracing/coresight/coresight-priv.h index 65a29293b6cb9a..f5f654ea29946d 100644 --- a/drivers/hwtracing/coresight/coresight-priv.h +++ b/drivers/hwtracing/coresight/coresight-priv.h @@ -87,6 +87,7 @@ enum cs_mode { * struct cs_buffer - keep track of a recording session' specifics * @cur: index of the current buffer * @nr_pages: max number of pages granted to us + * @pid: PID this cs_buffer belongs to * @offset: offset within the current buffer * @data_size: how much we collected in this run * @snapshot: is this run in snapshot mode @@ -95,6 +96,7 @@ enum cs_mode { struct cs_buffers { unsigned int cur; unsigned int nr_pages; + pid_t pid; unsigned long offset; local_t data_size; bool snapshot; diff --git a/drivers/hwtracing/coresight/coresight-replicator.c b/drivers/hwtracing/coresight/coresight-replicator.c index 62afdde0e5eaba..38008aca2c0f4b 100644 --- a/drivers/hwtracing/coresight/coresight-replicator.c +++ b/drivers/hwtracing/coresight/coresight-replicator.c @@ -291,7 +291,7 @@ static int replicator_probe(struct device *dev, struct resource *res) return ret; } -static int __exit replicator_remove(struct device *dev) +static int replicator_remove(struct device *dev) { struct replicator_drvdata *drvdata = dev_get_drvdata(dev); @@ -318,7 +318,7 @@ static int static_replicator_probe(struct platform_device *pdev) return ret; } -static int __exit static_replicator_remove(struct platform_device *pdev) +static int static_replicator_remove(struct platform_device *pdev) { replicator_remove(&pdev->dev); pm_runtime_disable(&pdev->dev); @@ -388,7 +388,7 @@ static int dynamic_replicator_probe(struct amba_device *adev, return replicator_probe(&adev->dev, &adev->res); } -static int __exit dynamic_replicator_remove(struct amba_device *adev) +static int dynamic_replicator_remove(struct amba_device *adev) { return replicator_remove(&adev->dev); } diff --git a/drivers/hwtracing/coresight/coresight-stm.c b/drivers/hwtracing/coresight/coresight-stm.c index b0ad912651a99e..587c1d7f252081 100644 --- a/drivers/hwtracing/coresight/coresight-stm.c +++ b/drivers/hwtracing/coresight/coresight-stm.c @@ -951,7 +951,7 @@ static int stm_probe(struct amba_device *adev, const struct amba_id *id) return ret; } -static int __exit stm_remove(struct amba_device *adev) +static int stm_remove(struct amba_device *adev) { struct stm_drvdata *drvdata = dev_get_drvdata(&adev->dev); diff --git a/drivers/hwtracing/coresight/coresight-tmc-core.c b/drivers/hwtracing/coresight/coresight-tmc-core.c index 5653e0945c74b6..8169dff5a9f6a7 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-core.c +++ b/drivers/hwtracing/coresight/coresight-tmc-core.c @@ -559,7 +559,7 @@ static void tmc_shutdown(struct amba_device *adev) spin_unlock_irqrestore(&drvdata->spinlock, flags); } -static int __exit tmc_remove(struct amba_device *adev) +static int tmc_remove(struct amba_device *adev) { struct tmc_drvdata *drvdata = dev_get_drvdata(&adev->dev); diff --git a/drivers/hwtracing/coresight/coresight-tmc-etf.c b/drivers/hwtracing/coresight/coresight-tmc-etf.c index 44402d413ebbdf..989d965f3d9011 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etf.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etf.c @@ -227,6 +227,7 @@ static int tmc_enable_etf_sink_perf(struct coresight_device *csdev, void *data) unsigned long flags; struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); struct perf_output_handle *handle = data; + struct cs_buffers *buf = etm_perf_sink_config(handle); spin_lock_irqsave(&drvdata->spinlock, flags); do { @@ -243,7 +244,7 @@ static int tmc_enable_etf_sink_perf(struct coresight_device *csdev, void *data) } /* Get a handle on the pid of the process to monitor */ - pid = task_pid_nr(handle->event->owner); + pid = buf->pid; if (drvdata->pid != -1 && drvdata->pid != pid) { ret = -EBUSY; @@ -399,6 +400,7 @@ static void *tmc_alloc_etf_buffer(struct coresight_device *csdev, if (!buf) return NULL; + buf->pid = task_pid_nr(event->owner); buf->snapshot = overwrite; buf->nr_pages = nr_pages; buf->data_pages = pages; diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c index 714f9e867e5f6a..3309b1344ffc01 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etr.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c @@ -217,6 +217,8 @@ static int tmc_pages_alloc(struct tmc_pages *tmc_pages, } else { page = alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0); + if (!page) + goto err; } paddr = dma_map_page(real_dev, page, 0, PAGE_SIZE, dir); if (dma_mapping_error(real_dev, paddr)) @@ -1550,7 +1552,7 @@ tmc_update_etr_buffer(struct coresight_device *csdev, /* Insert barrier packets at the beginning, if there was an overflow */ if (lost) - tmc_etr_buf_insert_barrier_packet(etr_buf, etr_buf->offset); + tmc_etr_buf_insert_barrier_packet(etr_buf, offset); tmc_etr_sync_perf_buffer(etr_perf, offset, size); /* diff --git a/drivers/hwtracing/coresight/coresight-tpiu.c b/drivers/hwtracing/coresight/coresight-tpiu.c index 566c57e035961e..5b35029461a0cb 100644 --- a/drivers/hwtracing/coresight/coresight-tpiu.c +++ b/drivers/hwtracing/coresight/coresight-tpiu.c @@ -173,7 +173,7 @@ static int tpiu_probe(struct amba_device *adev, const struct amba_id *id) return PTR_ERR(drvdata->csdev); } -static int __exit tpiu_remove(struct amba_device *adev) +static int tpiu_remove(struct amba_device *adev) { struct tpiu_drvdata *drvdata = dev_get_drvdata(&adev->dev); diff --git a/drivers/hwtracing/intel_th/pci.c b/drivers/hwtracing/intel_th/pci.c index 52acd77438ede9..251e75c9ba9d0d 100644 --- a/drivers/hwtracing/intel_th/pci.c +++ b/drivers/hwtracing/intel_th/pci.c @@ -268,6 +268,11 @@ static const struct pci_device_id intel_th_pci_id_table[] = { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7aa6), .driver_data = (kernel_ulong_t)&intel_th_2x, }, + { + /* Alder Lake-P */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x51a6), + .driver_data = (kernel_ulong_t)&intel_th_2x, + }, { /* Alder Lake CPU */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x466f), diff --git a/drivers/hwtracing/stm/heartbeat.c b/drivers/hwtracing/stm/heartbeat.c index 3e7df1c0477f75..81d7b21d31ec27 100644 --- a/drivers/hwtracing/stm/heartbeat.c +++ b/drivers/hwtracing/stm/heartbeat.c @@ -64,7 +64,7 @@ static void stm_heartbeat_unlink(struct stm_source_data *data) static int stm_heartbeat_init(void) { - int i, ret = -ENOMEM; + int i, ret; if (nr_devs < 0 || nr_devs > STM_HEARTBEAT_MAX) return -EINVAL; @@ -72,8 +72,10 @@ static int stm_heartbeat_init(void) for (i = 0; i < nr_devs; i++) { stm_heartbeat[i].data.name = kasprintf(GFP_KERNEL, "heartbeat.%d", i); - if (!stm_heartbeat[i].data.name) + if (!stm_heartbeat[i].data.name) { + ret = -ENOMEM; goto fail_unregister; + } stm_heartbeat[i].data.nr_chans = 1; stm_heartbeat[i].data.link = stm_heartbeat_link; diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig index a49e0ed4a599d5..7e693dcbdd1961 100644 --- a/drivers/i2c/busses/Kconfig +++ b/drivers/i2c/busses/Kconfig @@ -1012,6 +1012,7 @@ config I2C_SIRF config I2C_SPRD tristate "Spreadtrum I2C interface" depends on I2C=y && (ARCH_SPRD || COMPILE_TEST) + depends on COMMON_CLK help If you say yes to this option, support will be included for the Spreadtrum I2C interface. diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index ae90713443fa61..877fe3733a42b2 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -1449,7 +1449,7 @@ static int i801_add_mux(struct i801_priv *priv) /* Register GPIO descriptor lookup table */ lookup = devm_kzalloc(dev, - struct_size(lookup, table, mux_config->n_gpios), + struct_size(lookup, table, mux_config->n_gpios + 1), GFP_KERNEL); if (!lookup) return -ENOMEM; diff --git a/drivers/i2c/busses/i2c-mt65xx.c b/drivers/i2c/busses/i2c-mt65xx.c index 33de99b7bc20c0..0818d3e5073477 100644 --- a/drivers/i2c/busses/i2c-mt65xx.c +++ b/drivers/i2c/busses/i2c-mt65xx.c @@ -38,6 +38,7 @@ #define I2C_IO_CONFIG_OPEN_DRAIN 0x0003 #define I2C_IO_CONFIG_PUSH_PULL 0x0000 #define I2C_SOFT_RST 0x0001 +#define I2C_HANDSHAKE_RST 0x0020 #define I2C_FIFO_ADDR_CLR 0x0001 #define I2C_DELAY_LEN 0x0002 #define I2C_TIME_CLR_VALUE 0x0000 @@ -45,6 +46,7 @@ #define I2C_WRRD_TRANAC_VALUE 0x0002 #define I2C_RD_TRANAC_VALUE 0x0001 #define I2C_SCL_MIS_COMP_VALUE 0x0000 +#define I2C_CHN_CLR_FLAG 0x0000 #define I2C_DMA_CON_TX 0x0000 #define I2C_DMA_CON_RX 0x0001 @@ -54,7 +56,9 @@ #define I2C_DMA_START_EN 0x0001 #define I2C_DMA_INT_FLAG_NONE 0x0000 #define I2C_DMA_CLR_FLAG 0x0000 +#define I2C_DMA_WARM_RST 0x0001 #define I2C_DMA_HARD_RST 0x0002 +#define I2C_DMA_HANDSHAKE_RST 0x0004 #define MAX_SAMPLE_CNT_DIV 8 #define MAX_STEP_CNT_DIV 64 @@ -475,11 +479,24 @@ static void mtk_i2c_init_hw(struct mtk_i2c *i2c) { u16 control_reg; - writel(I2C_DMA_HARD_RST, i2c->pdmabase + OFFSET_RST); - udelay(50); - writel(I2C_DMA_CLR_FLAG, i2c->pdmabase + OFFSET_RST); - - mtk_i2c_writew(i2c, I2C_SOFT_RST, OFFSET_SOFTRESET); + if (i2c->dev_comp->dma_sync) { + writel(I2C_DMA_WARM_RST, i2c->pdmabase + OFFSET_RST); + udelay(10); + writel(I2C_DMA_CLR_FLAG, i2c->pdmabase + OFFSET_RST); + udelay(10); + writel(I2C_DMA_HANDSHAKE_RST | I2C_DMA_HARD_RST, + i2c->pdmabase + OFFSET_RST); + mtk_i2c_writew(i2c, I2C_HANDSHAKE_RST | I2C_SOFT_RST, + OFFSET_SOFTRESET); + udelay(10); + writel(I2C_DMA_CLR_FLAG, i2c->pdmabase + OFFSET_RST); + mtk_i2c_writew(i2c, I2C_CHN_CLR_FLAG, OFFSET_SOFTRESET); + } else { + writel(I2C_DMA_HARD_RST, i2c->pdmabase + OFFSET_RST); + udelay(50); + writel(I2C_DMA_CLR_FLAG, i2c->pdmabase + OFFSET_RST); + mtk_i2c_writew(i2c, I2C_SOFT_RST, OFFSET_SOFTRESET); + } /* Set ioconfig */ if (i2c->use_push_pull) diff --git a/drivers/i2c/busses/i2c-octeon-core.c b/drivers/i2c/busses/i2c-octeon-core.c index d9607905dc2f1d..845eda70b8cab5 100644 --- a/drivers/i2c/busses/i2c-octeon-core.c +++ b/drivers/i2c/busses/i2c-octeon-core.c @@ -347,7 +347,7 @@ static int octeon_i2c_read(struct octeon_i2c *i2c, int target, if (result) return result; if (recv_len && i == 0) { - if (data[i] > I2C_SMBUS_BLOCK_MAX + 1) + if (data[i] > I2C_SMBUS_BLOCK_MAX) return -EPROTO; length += data[i]; } diff --git a/drivers/i2c/busses/i2c-qcom-geni.c b/drivers/i2c/busses/i2c-qcom-geni.c index 8b4c35f47a70f0..dce75b85253c16 100644 --- a/drivers/i2c/busses/i2c-qcom-geni.c +++ b/drivers/i2c/busses/i2c-qcom-geni.c @@ -366,6 +366,7 @@ static int geni_i2c_rx_one_msg(struct geni_i2c_dev *gi2c, struct i2c_msg *msg, geni_se_select_mode(se, GENI_SE_FIFO); writel_relaxed(len, se->base + SE_I2C_RX_TRANS_LEN); + geni_se_setup_m_cmd(se, I2C_READ, m_param); if (dma_buf && geni_se_rx_dma_prep(se, dma_buf, len, &rx_dma)) { geni_se_select_mode(se, GENI_SE_FIFO); @@ -373,8 +374,6 @@ static int geni_i2c_rx_one_msg(struct geni_i2c_dev *gi2c, struct i2c_msg *msg, dma_buf = NULL; } - geni_se_setup_m_cmd(se, I2C_READ, m_param); - time_left = wait_for_completion_timeout(&gi2c->done, XFER_TIMEOUT); if (!time_left) geni_i2c_abort_xfer(gi2c); @@ -408,6 +407,7 @@ static int geni_i2c_tx_one_msg(struct geni_i2c_dev *gi2c, struct i2c_msg *msg, geni_se_select_mode(se, GENI_SE_FIFO); writel_relaxed(len, se->base + SE_I2C_TX_TRANS_LEN); + geni_se_setup_m_cmd(se, I2C_WRITE, m_param); if (dma_buf && geni_se_tx_dma_prep(se, dma_buf, len, &tx_dma)) { geni_se_select_mode(se, GENI_SE_FIFO); @@ -415,8 +415,6 @@ static int geni_i2c_tx_one_msg(struct geni_i2c_dev *gi2c, struct i2c_msg *msg, dma_buf = NULL; } - geni_se_setup_m_cmd(se, I2C_WRITE, m_param); - if (!dma_buf) /* Get FIFO IRQ */ writel_relaxed(1, se->base + SE_GENI_TX_WATERMARK_REG); diff --git a/drivers/i2c/busses/i2c-sprd.c b/drivers/i2c/busses/i2c-sprd.c index 19cda6742423d9..2917fecf6c80d0 100644 --- a/drivers/i2c/busses/i2c-sprd.c +++ b/drivers/i2c/busses/i2c-sprd.c @@ -72,6 +72,8 @@ /* timeout (ms) for pm runtime autosuspend */ #define SPRD_I2C_PM_TIMEOUT 1000 +/* timeout (ms) for transfer message */ +#define I2C_XFER_TIMEOUT 1000 /* SPRD i2c data structure */ struct sprd_i2c { @@ -244,6 +246,7 @@ static int sprd_i2c_handle_msg(struct i2c_adapter *i2c_adap, struct i2c_msg *msg, bool is_last_msg) { struct sprd_i2c *i2c_dev = i2c_adap->algo_data; + unsigned long time_left; i2c_dev->msg = msg; i2c_dev->buf = msg->buf; @@ -273,7 +276,10 @@ static int sprd_i2c_handle_msg(struct i2c_adapter *i2c_adap, sprd_i2c_opt_start(i2c_dev); - wait_for_completion(&i2c_dev->complete); + time_left = wait_for_completion_timeout(&i2c_dev->complete, + msecs_to_jiffies(I2C_XFER_TIMEOUT)); + if (!time_left) + return -ETIMEDOUT; return i2c_dev->err; } diff --git a/drivers/i2c/busses/i2c-tegra-bpmp.c b/drivers/i2c/busses/i2c-tegra-bpmp.c index ec7a7e917eddb2..c0c7d01473f2ba 100644 --- a/drivers/i2c/busses/i2c-tegra-bpmp.c +++ b/drivers/i2c/busses/i2c-tegra-bpmp.c @@ -80,7 +80,7 @@ static int tegra_bpmp_xlate_flags(u16 flags, u16 *out) flags &= ~I2C_M_RECV_LEN; } - return (flags != 0) ? -EINVAL : 0; + return 0; } /** diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c index 6f08c0c3238d5a..8b113ae32dc713 100644 --- a/drivers/i2c/busses/i2c-tegra.c +++ b/drivers/i2c/busses/i2c-tegra.c @@ -326,6 +326,8 @@ static void i2c_writel(struct tegra_i2c_dev *i2c_dev, u32 val, unsigned int reg) /* read back register to make sure that register writes completed */ if (reg != I2C_TX_FIFO) readl_relaxed(i2c_dev->base + tegra_i2c_reg_addr(i2c_dev, reg)); + else if (i2c_dev->is_vi) + readl_relaxed(i2c_dev->base + tegra_i2c_reg_addr(i2c_dev, I2C_INT_STATUS)); } static u32 i2c_readl(struct tegra_i2c_dev *i2c_dev, unsigned int reg) @@ -339,6 +341,21 @@ static void i2c_writesl(struct tegra_i2c_dev *i2c_dev, void *data, writesl(i2c_dev->base + tegra_i2c_reg_addr(i2c_dev, reg), data, len); } +static void i2c_writesl_vi(struct tegra_i2c_dev *i2c_dev, void *data, + unsigned int reg, unsigned int len) +{ + u32 *data32 = data; + + /* + * VI I2C controller has known hardware bug where writes get stuck + * when immediate multiple writes happen to TX_FIFO register. + * Recommended software work around is to read I2C register after + * each write to TX_FIFO register to flush out the data. + */ + while (len--) + i2c_writel(i2c_dev, *data32++, reg); +} + static void i2c_readsl(struct tegra_i2c_dev *i2c_dev, void *data, unsigned int reg, unsigned int len) { @@ -533,7 +550,7 @@ static int tegra_i2c_poll_register(struct tegra_i2c_dev *i2c_dev, void __iomem *addr = i2c_dev->base + tegra_i2c_reg_addr(i2c_dev, reg); u32 val; - if (!i2c_dev->atomic_mode) + if (!i2c_dev->atomic_mode && !in_irq()) return readl_relaxed_poll_timeout(addr, val, !(val & mask), delay_us, timeout_us); @@ -811,7 +828,10 @@ static int tegra_i2c_fill_tx_fifo(struct tegra_i2c_dev *i2c_dev) i2c_dev->msg_buf_remaining = buf_remaining; i2c_dev->msg_buf = buf + words_to_transfer * BYTES_PER_FIFO_WORD; - i2c_writesl(i2c_dev, buf, I2C_TX_FIFO, words_to_transfer); + if (i2c_dev->is_vi) + i2c_writesl_vi(i2c_dev, buf, I2C_TX_FIFO, words_to_transfer); + else + i2c_writesl(i2c_dev, buf, I2C_TX_FIFO, words_to_transfer); buf += words_to_transfer * BYTES_PER_FIFO_WORD; } diff --git a/drivers/i3c/master.c b/drivers/i3c/master.c index 1c6b78ad5ade46..b61bf53ec07afc 100644 --- a/drivers/i3c/master.c +++ b/drivers/i3c/master.c @@ -2537,7 +2537,7 @@ int i3c_master_register(struct i3c_master_controller *master, ret = i3c_master_bus_init(master); if (ret) - goto err_put_dev; + goto err_destroy_wq; ret = device_add(&master->dev); if (ret) @@ -2568,6 +2568,9 @@ int i3c_master_register(struct i3c_master_controller *master, err_cleanup_bus: i3c_master_bus_cleanup(master); +err_destroy_wq: + destroy_workqueue(master->wq); + err_put_dev: put_device(&master->dev); diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c index 2162bc80f09e02..013ad33fbbc81e 100644 --- a/drivers/ide/ide-atapi.c +++ b/drivers/ide/ide-atapi.c @@ -223,7 +223,6 @@ void ide_prep_sense(ide_drive_t *drive, struct request *rq) sense_rq->rq_disk = rq->rq_disk; sense_rq->cmd_flags = REQ_OP_DRV_IN; ide_req(sense_rq)->type = ATA_PRIV_SENSE; - sense_rq->rq_flags |= RQF_PREEMPT; req->cmd[0] = GPCMD_REQUEST_SENSE; req->cmd[4] = cmd_len; diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c index 1a53c7a752244b..4867b67b60d698 100644 --- a/drivers/ide/ide-io.c +++ b/drivers/ide/ide-io.c @@ -515,15 +515,10 @@ blk_status_t ide_issue_rq(ide_drive_t *drive, struct request *rq, * above to return us whatever is in the queue. Since we call * ide_do_request() ourselves, we end up taking requests while * the queue is blocked... - * - * We let requests forced at head of queue with ide-preempt - * though. I hope that doesn't happen too much, hopefully not - * unless the subdriver triggers such a thing in its own PM - * state machine. */ if ((drive->dev_flags & IDE_DFLAG_BLOCKED) && ata_pm_request(rq) == 0 && - (rq->rq_flags & RQF_PREEMPT) == 0) { + (rq->rq_flags & RQF_PM) == 0) { /* there should be no pending command at this point */ ide_unlock_port(hwif); goto plug_device; diff --git a/drivers/ide/ide-pm.c b/drivers/ide/ide-pm.c index 192e6c65d34e7a..82ab308f1aafe0 100644 --- a/drivers/ide/ide-pm.c +++ b/drivers/ide/ide-pm.c @@ -77,7 +77,7 @@ int generic_ide_resume(struct device *dev) } memset(&rqpm, 0, sizeof(rqpm)); - rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, BLK_MQ_REQ_PREEMPT); + rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, BLK_MQ_REQ_PM); ide_req(rq)->type = ATA_PRIV_PM_RESUME; ide_req(rq)->special = &rqpm; rqpm.pm_step = IDE_PM_START_RESUME; diff --git a/drivers/iio/adc/Kconfig b/drivers/iio/adc/Kconfig index 91ae90514aff42..17e9ceb9c6c48e 100644 --- a/drivers/iio/adc/Kconfig +++ b/drivers/iio/adc/Kconfig @@ -295,7 +295,7 @@ config ASPEED_ADC config AT91_ADC tristate "Atmel AT91 ADC" depends on ARCH_AT91 || COMPILE_TEST - depends on INPUT && SYSFS + depends on INPUT && SYSFS && OF select IIO_BUFFER select IIO_TRIGGERED_BUFFER help diff --git a/drivers/iio/adc/ad_sigma_delta.c b/drivers/iio/adc/ad_sigma_delta.c index 86039e9ecaca1b..3a6f239d4acca3 100644 --- a/drivers/iio/adc/ad_sigma_delta.c +++ b/drivers/iio/adc/ad_sigma_delta.c @@ -57,7 +57,7 @@ EXPORT_SYMBOL_GPL(ad_sd_set_comm); int ad_sd_write_reg(struct ad_sigma_delta *sigma_delta, unsigned int reg, unsigned int size, unsigned int val) { - uint8_t *data = sigma_delta->data; + uint8_t *data = sigma_delta->tx_buf; struct spi_transfer t = { .tx_buf = data, .len = size + 1, @@ -99,7 +99,7 @@ EXPORT_SYMBOL_GPL(ad_sd_write_reg); static int ad_sd_read_reg_raw(struct ad_sigma_delta *sigma_delta, unsigned int reg, unsigned int size, uint8_t *val) { - uint8_t *data = sigma_delta->data; + uint8_t *data = sigma_delta->tx_buf; int ret; struct spi_transfer t[] = { { @@ -146,22 +146,22 @@ int ad_sd_read_reg(struct ad_sigma_delta *sigma_delta, { int ret; - ret = ad_sd_read_reg_raw(sigma_delta, reg, size, sigma_delta->data); + ret = ad_sd_read_reg_raw(sigma_delta, reg, size, sigma_delta->rx_buf); if (ret < 0) goto out; switch (size) { case 4: - *val = get_unaligned_be32(sigma_delta->data); + *val = get_unaligned_be32(sigma_delta->rx_buf); break; case 3: - *val = get_unaligned_be24(&sigma_delta->data[0]); + *val = get_unaligned_be24(sigma_delta->rx_buf); break; case 2: - *val = get_unaligned_be16(sigma_delta->data); + *val = get_unaligned_be16(sigma_delta->rx_buf); break; case 1: - *val = sigma_delta->data[0]; + *val = sigma_delta->rx_buf[0]; break; default: ret = -EINVAL; @@ -395,11 +395,9 @@ static irqreturn_t ad_sd_trigger_handler(int irq, void *p) struct iio_poll_func *pf = p; struct iio_dev *indio_dev = pf->indio_dev; struct ad_sigma_delta *sigma_delta = iio_device_get_drvdata(indio_dev); + uint8_t *data = sigma_delta->rx_buf; unsigned int reg_size; unsigned int data_reg; - uint8_t data[16]; - - memset(data, 0x00, 16); reg_size = indio_dev->channels[0].scan_type.realbits + indio_dev->channels[0].scan_type.shift; diff --git a/drivers/iio/adc/at91_adc.c b/drivers/iio/adc/at91_adc.c index 9b2c548fae9571..0a793e7cd53ee1 100644 --- a/drivers/iio/adc/at91_adc.c +++ b/drivers/iio/adc/at91_adc.c @@ -1469,7 +1469,7 @@ static struct platform_driver at91_adc_driver = { .id_table = at91_adc_ids, .driver = { .name = DRIVER_NAME, - .of_match_table = of_match_ptr(at91_adc_dt_ids), + .of_match_table = at91_adc_dt_ids, .pm = &at91_adc_pm_ops, }, }; diff --git a/drivers/iio/adc/rockchip_saradc.c b/drivers/iio/adc/rockchip_saradc.c index 1f3d7d639d378e..12584f1631d888 100644 --- a/drivers/iio/adc/rockchip_saradc.c +++ b/drivers/iio/adc/rockchip_saradc.c @@ -462,7 +462,7 @@ static int rockchip_saradc_resume(struct device *dev) ret = clk_prepare_enable(info->clk); if (ret) - return ret; + clk_disable_unprepare(info->pclk); return ret; } diff --git a/drivers/iio/adc/ti-ads124s08.c b/drivers/iio/adc/ti-ads124s08.c index 4b4fbe33930ce3..b4a128b1918894 100644 --- a/drivers/iio/adc/ti-ads124s08.c +++ b/drivers/iio/adc/ti-ads124s08.c @@ -99,6 +99,14 @@ struct ads124s_private { struct gpio_desc *reset_gpio; struct spi_device *spi; struct mutex lock; + /* + * Used to correctly align data. + * Ensure timestamp is naturally aligned. + * Note that the full buffer length may not be needed if not + * all channels are enabled, as long as the alignment of the + * timestamp is maintained. + */ + u32 buffer[ADS124S08_MAX_CHANNELS + sizeof(s64)/sizeof(u32)] __aligned(8); u8 data[5] ____cacheline_aligned; }; @@ -269,7 +277,6 @@ static irqreturn_t ads124s_trigger_handler(int irq, void *p) struct iio_poll_func *pf = p; struct iio_dev *indio_dev = pf->indio_dev; struct ads124s_private *priv = iio_priv(indio_dev); - u32 buffer[ADS124S08_MAX_CHANNELS + sizeof(s64)/sizeof(u16)]; int scan_index, j = 0; int ret; @@ -284,7 +291,7 @@ static irqreturn_t ads124s_trigger_handler(int irq, void *p) if (ret) dev_err(&priv->spi->dev, "Start ADC conversions failed\n"); - buffer[j] = ads124s_read(indio_dev, scan_index); + priv->buffer[j] = ads124s_read(indio_dev, scan_index); ret = ads124s_write_cmd(indio_dev, ADS124S08_STOP_CONV); if (ret) dev_err(&priv->spi->dev, "Stop ADC conversions failed\n"); @@ -292,7 +299,7 @@ static irqreturn_t ads124s_trigger_handler(int irq, void *p) j++; } - iio_push_to_buffers_with_timestamp(indio_dev, buffer, + iio_push_to_buffers_with_timestamp(indio_dev, priv->buffer, pf->timestamp); iio_trigger_notify_done(indio_dev->trig); diff --git a/drivers/iio/adc/ti_am335x_adc.c b/drivers/iio/adc/ti_am335x_adc.c index b11c8c47ba2aae..e946903b099367 100644 --- a/drivers/iio/adc/ti_am335x_adc.c +++ b/drivers/iio/adc/ti_am335x_adc.c @@ -397,16 +397,12 @@ static int tiadc_iio_buffered_hardware_setup(struct device *dev, ret = devm_request_threaded_irq(dev, irq, pollfunc_th, pollfunc_bh, flags, indio_dev->name, indio_dev); if (ret) - goto error_kfifo_free; + return ret; indio_dev->setup_ops = setup_ops; indio_dev->modes |= INDIO_BUFFER_SOFTWARE; return 0; - -error_kfifo_free: - iio_kfifo_free(indio_dev->buffer); - return ret; } static const char * const chan_name_ain[] = { diff --git a/drivers/iio/common/st_sensors/st_sensors_trigger.c b/drivers/iio/common/st_sensors/st_sensors_trigger.c index 0507283bd4c1de..2dbd2646e44e97 100644 --- a/drivers/iio/common/st_sensors/st_sensors_trigger.c +++ b/drivers/iio/common/st_sensors/st_sensors_trigger.c @@ -23,35 +23,31 @@ * @sdata: Sensor data. * * returns: - * 0 - no new samples available - * 1 - new samples available - * negative - error or unknown + * false - no new samples available or read error + * true - new samples available */ -static int st_sensors_new_samples_available(struct iio_dev *indio_dev, - struct st_sensor_data *sdata) +static bool st_sensors_new_samples_available(struct iio_dev *indio_dev, + struct st_sensor_data *sdata) { int ret, status; /* How would I know if I can't check it? */ if (!sdata->sensor_settings->drdy_irq.stat_drdy.addr) - return -EINVAL; + return true; /* No scan mask, no interrupt */ if (!indio_dev->active_scan_mask) - return 0; + return false; ret = regmap_read(sdata->regmap, sdata->sensor_settings->drdy_irq.stat_drdy.addr, &status); if (ret < 0) { dev_err(sdata->dev, "error checking samples available\n"); - return ret; + return false; } - if (status & sdata->sensor_settings->drdy_irq.stat_drdy.mask) - return 1; - - return 0; + return !!(status & sdata->sensor_settings->drdy_irq.stat_drdy.mask); } /** @@ -180,9 +176,15 @@ int st_sensors_allocate_trigger(struct iio_dev *indio_dev, /* Tell the interrupt handler that we're dealing with edges */ if (irq_trig == IRQF_TRIGGER_FALLING || - irq_trig == IRQF_TRIGGER_RISING) + irq_trig == IRQF_TRIGGER_RISING) { + if (!sdata->sensor_settings->drdy_irq.stat_drdy.addr) { + dev_err(&indio_dev->dev, + "edge IRQ not supported w/o stat register.\n"); + err = -EOPNOTSUPP; + goto iio_trigger_free; + } sdata->edge_irq = true; - else + } else { /* * If we're not using edges (i.e. level interrupts) we * just mask off the IRQ, handle one interrupt, then @@ -190,6 +192,7 @@ int st_sensors_allocate_trigger(struct iio_dev *indio_dev, * interrupt handler top half again and start over. */ irq_trig |= IRQF_ONESHOT; + } /* * If the interrupt pin is Open Drain, by definition this diff --git a/drivers/iio/dac/ad5504.c b/drivers/iio/dac/ad5504.c index 28921b62e64203..e9297c25d4ef63 100644 --- a/drivers/iio/dac/ad5504.c +++ b/drivers/iio/dac/ad5504.c @@ -187,9 +187,9 @@ static ssize_t ad5504_write_dac_powerdown(struct iio_dev *indio_dev, return ret; if (pwr_down) - st->pwr_down_mask |= (1 << chan->channel); - else st->pwr_down_mask &= ~(1 << chan->channel); + else + st->pwr_down_mask |= (1 << chan->channel); ret = ad5504_spi_write(st, AD5504_ADDR_CTRL, AD5504_DAC_PWRDWN_MODE(st->pwr_down_mode) | diff --git a/drivers/iio/imu/bmi160/bmi160.h b/drivers/iio/imu/bmi160/bmi160.h index a82e040bd10983..32c2ea2d71129a 100644 --- a/drivers/iio/imu/bmi160/bmi160.h +++ b/drivers/iio/imu/bmi160/bmi160.h @@ -10,6 +10,13 @@ struct bmi160_data { struct iio_trigger *trig; struct regulator_bulk_data supplies[2]; struct iio_mount_matrix orientation; + /* + * Ensure natural alignment for timestamp if present. + * Max length needed: 2 * 3 channels + 4 bytes padding + 8 byte ts. + * If fewer channels are enabled, less space may be needed, as + * long as the timestamp is still aligned to 8 bytes. + */ + __le16 buf[12] __aligned(8); }; extern const struct regmap_config bmi160_regmap_config; diff --git a/drivers/iio/imu/bmi160/bmi160_core.c b/drivers/iio/imu/bmi160/bmi160_core.c index 222ebb26f01320..82f03a4dc47a7e 100644 --- a/drivers/iio/imu/bmi160/bmi160_core.c +++ b/drivers/iio/imu/bmi160/bmi160_core.c @@ -427,8 +427,6 @@ static irqreturn_t bmi160_trigger_handler(int irq, void *p) struct iio_poll_func *pf = p; struct iio_dev *indio_dev = pf->indio_dev; struct bmi160_data *data = iio_priv(indio_dev); - __le16 buf[16]; - /* 3 sens x 3 axis x __le16 + 3 x __le16 pad + 4 x __le16 tstamp */ int i, ret, j = 0, base = BMI160_REG_DATA_MAGN_XOUT_L; __le16 sample; @@ -438,10 +436,10 @@ static irqreturn_t bmi160_trigger_handler(int irq, void *p) &sample, sizeof(sample)); if (ret) goto done; - buf[j++] = sample; + data->buf[j++] = sample; } - iio_push_to_buffers_with_timestamp(indio_dev, buf, pf->timestamp); + iio_push_to_buffers_with_timestamp(indio_dev, data->buf, pf->timestamp); done: iio_trigger_notify_done(indio_dev->trig); return IRQ_HANDLED; diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c index 42f485634d044f..2ab1ac5a2412f9 100644 --- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c +++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c @@ -2255,19 +2255,35 @@ st_lsm6dsx_report_motion_event(struct st_lsm6dsx_hw *hw) static irqreturn_t st_lsm6dsx_handler_thread(int irq, void *private) { struct st_lsm6dsx_hw *hw = private; + int fifo_len = 0, len; bool event; - int count; event = st_lsm6dsx_report_motion_event(hw); if (!hw->settings->fifo_ops.read_fifo) return event ? IRQ_HANDLED : IRQ_NONE; - mutex_lock(&hw->fifo_lock); - count = hw->settings->fifo_ops.read_fifo(hw); - mutex_unlock(&hw->fifo_lock); + /* + * If we are using edge IRQs, new samples can arrive while + * processing current interrupt since there are no hw + * guarantees the irq line stays "low" long enough to properly + * detect the new interrupt. In this case the new sample will + * be missed. + * Polling FIFO status register allow us to read new + * samples even if the interrupt arrives while processing + * previous data and the timeslot where the line is "low" is + * too short to be properly detected. + */ + do { + mutex_lock(&hw->fifo_lock); + len = hw->settings->fifo_ops.read_fifo(hw); + mutex_unlock(&hw->fifo_lock); + + if (len > 0) + fifo_len += len; + } while (len > 0); - return count || event ? IRQ_HANDLED : IRQ_NONE; + return fifo_len || event ? IRQ_HANDLED : IRQ_NONE; } static int st_lsm6dsx_irq_setup(struct st_lsm6dsx_hw *hw) diff --git a/drivers/iio/industrialio-buffer.c b/drivers/iio/industrialio-buffer.c index a4f6bb96d4f425..276b609d791748 100644 --- a/drivers/iio/industrialio-buffer.c +++ b/drivers/iio/industrialio-buffer.c @@ -865,12 +865,12 @@ static int iio_buffer_update_demux(struct iio_dev *indio_dev, indio_dev->masklength, in_ind + 1); while (in_ind != out_ind) { - in_ind = find_next_bit(indio_dev->active_scan_mask, - indio_dev->masklength, - in_ind + 1); length = iio_storage_bytes_for_si(indio_dev, in_ind); /* Make sure we are aligned */ in_loc = roundup(in_loc, length) + length; + in_ind = find_next_bit(indio_dev->active_scan_mask, + indio_dev->masklength, + in_ind + 1); } length = iio_storage_bytes_for_si(indio_dev, in_ind); out_loc = roundup(out_loc, length); diff --git a/drivers/iio/light/rpr0521.c b/drivers/iio/light/rpr0521.c index aa2972b0483344..31224a33bade37 100644 --- a/drivers/iio/light/rpr0521.c +++ b/drivers/iio/light/rpr0521.c @@ -194,6 +194,17 @@ struct rpr0521_data { bool pxs_need_dis; struct regmap *regmap; + + /* + * Ensure correct naturally aligned timestamp. + * Note that the read will put garbage data into + * the padding but this should not be a problem + */ + struct { + __le16 channels[3]; + u8 garbage; + s64 ts __aligned(8); + } scan; }; static IIO_CONST_ATTR(in_intensity_scale_available, RPR0521_ALS_SCALE_AVAIL); @@ -449,8 +460,6 @@ static irqreturn_t rpr0521_trigger_consumer_handler(int irq, void *p) struct rpr0521_data *data = iio_priv(indio_dev); int err; - u8 buffer[16]; /* 3 16-bit channels + padding + ts */ - /* Use irq timestamp when reasonable. */ if (iio_trigger_using_own(indio_dev) && data->irq_timestamp) { pf->timestamp = data->irq_timestamp; @@ -461,11 +470,11 @@ static irqreturn_t rpr0521_trigger_consumer_handler(int irq, void *p) pf->timestamp = iio_get_time_ns(indio_dev); err = regmap_bulk_read(data->regmap, RPR0521_REG_PXS_DATA, - &buffer, + data->scan.channels, (3 * 2) + 1); /* 3 * 16-bit + (discarded) int clear reg. */ if (!err) iio_push_to_buffers_with_timestamp(indio_dev, - buffer, pf->timestamp); + &data->scan, pf->timestamp); else dev_err(&data->client->dev, "Trigger consumer can't read from sensor.\n"); diff --git a/drivers/iio/light/st_uvis25.h b/drivers/iio/light/st_uvis25.h index 78bc56aad1299f..283086887caf5d 100644 --- a/drivers/iio/light/st_uvis25.h +++ b/drivers/iio/light/st_uvis25.h @@ -27,6 +27,11 @@ struct st_uvis25_hw { struct iio_trigger *trig; bool enabled; int irq; + /* Ensure timestamp is naturally aligned */ + struct { + u8 chan; + s64 ts __aligned(8); + } scan; }; extern const struct dev_pm_ops st_uvis25_pm_ops; diff --git a/drivers/iio/light/st_uvis25_core.c b/drivers/iio/light/st_uvis25_core.c index a18a82e6bbf5d6..1055594b22764e 100644 --- a/drivers/iio/light/st_uvis25_core.c +++ b/drivers/iio/light/st_uvis25_core.c @@ -232,17 +232,19 @@ static const struct iio_buffer_setup_ops st_uvis25_buffer_ops = { static irqreturn_t st_uvis25_buffer_handler_thread(int irq, void *p) { - u8 buffer[ALIGN(sizeof(u8), sizeof(s64)) + sizeof(s64)]; struct iio_poll_func *pf = p; struct iio_dev *iio_dev = pf->indio_dev; struct st_uvis25_hw *hw = iio_priv(iio_dev); + unsigned int val; int err; - err = regmap_read(hw->regmap, ST_UVIS25_REG_OUT_ADDR, (int *)buffer); + err = regmap_read(hw->regmap, ST_UVIS25_REG_OUT_ADDR, &val); if (err < 0) goto out; - iio_push_to_buffers_with_timestamp(iio_dev, buffer, + hw->scan.chan = val; + + iio_push_to_buffers_with_timestamp(iio_dev, &hw->scan, iio_get_time_ns(iio_dev)); out: diff --git a/drivers/iio/magnetometer/mag3110.c b/drivers/iio/magnetometer/mag3110.c index 838b13c8bb3dbd..c96415a1aeaddb 100644 --- a/drivers/iio/magnetometer/mag3110.c +++ b/drivers/iio/magnetometer/mag3110.c @@ -56,6 +56,12 @@ struct mag3110_data { int sleep_val; struct regulator *vdd_reg; struct regulator *vddio_reg; + /* Ensure natural alignment of timestamp */ + struct { + __be16 channels[3]; + u8 temperature; + s64 ts __aligned(8); + } scan; }; static int mag3110_request(struct mag3110_data *data) @@ -387,10 +393,9 @@ static irqreturn_t mag3110_trigger_handler(int irq, void *p) struct iio_poll_func *pf = p; struct iio_dev *indio_dev = pf->indio_dev; struct mag3110_data *data = iio_priv(indio_dev); - u8 buffer[16]; /* 3 16-bit channels + 1 byte temp + padding + ts */ int ret; - ret = mag3110_read(data, (__be16 *) buffer); + ret = mag3110_read(data, data->scan.channels); if (ret < 0) goto done; @@ -399,10 +404,10 @@ static irqreturn_t mag3110_trigger_handler(int irq, void *p) MAG3110_DIE_TEMP); if (ret < 0) goto done; - buffer[6] = ret; + data->scan.temperature = ret; } - iio_push_to_buffers_with_timestamp(indio_dev, buffer, + iio_push_to_buffers_with_timestamp(indio_dev, &data->scan, iio_get_time_ns(indio_dev)); done: diff --git a/drivers/iio/pressure/mpl3115.c b/drivers/iio/pressure/mpl3115.c index ccdb0b70e48caf..1eb9e7b29e0500 100644 --- a/drivers/iio/pressure/mpl3115.c +++ b/drivers/iio/pressure/mpl3115.c @@ -144,7 +144,14 @@ static irqreturn_t mpl3115_trigger_handler(int irq, void *p) struct iio_poll_func *pf = p; struct iio_dev *indio_dev = pf->indio_dev; struct mpl3115_data *data = iio_priv(indio_dev); - u8 buffer[16]; /* 32-bit channel + 16-bit channel + padding + ts */ + /* + * 32-bit channel + 16-bit channel + padding + ts + * Note that it is possible for only one of the first 2 + * channels to be enabled. If that happens, the first element + * of the buffer may be either 16 or 32-bits. As such we cannot + * use a simple structure definition to express this data layout. + */ + u8 buffer[16] __aligned(8); int ret, pos = 0; mutex_lock(&data->lock); diff --git a/drivers/iio/temperature/mlx90632.c b/drivers/iio/temperature/mlx90632.c index 503fe54a0bb937..608ccb1d8bc82f 100644 --- a/drivers/iio/temperature/mlx90632.c +++ b/drivers/iio/temperature/mlx90632.c @@ -248,6 +248,12 @@ static int mlx90632_set_meas_type(struct regmap *regmap, u8 type) if (ret < 0) return ret; + /* + * Give the mlx90632 some time to reset properly before sending a new I2C command + * if this is not done, the following I2C command(s) will not be accepted. + */ + usleep_range(150, 200); + ret = regmap_write_bits(regmap, MLX90632_REG_CONTROL, (MLX90632_CFG_MTYP_MASK | MLX90632_CFG_PWR_MASK), (MLX90632_MTYP_STATUS(type) | MLX90632_PWR_STATUS_HALT)); diff --git a/drivers/iio/trigger/iio-trig-hrtimer.c b/drivers/iio/trigger/iio-trig-hrtimer.c index f59bf8d585866e..410de837d0417c 100644 --- a/drivers/iio/trigger/iio-trig-hrtimer.c +++ b/drivers/iio/trigger/iio-trig-hrtimer.c @@ -102,7 +102,7 @@ static int iio_trig_hrtimer_set_state(struct iio_trigger *trig, bool state) if (state) hrtimer_start(&trig_info->timer, trig_info->period, - HRTIMER_MODE_REL); + HRTIMER_MODE_REL_HARD); else hrtimer_cancel(&trig_info->timer); @@ -132,7 +132,7 @@ static struct iio_sw_trigger *iio_trig_hrtimer_probe(const char *name) trig_info->swt.trigger->ops = &iio_hrtimer_trigger_ops; trig_info->swt.trigger->dev.groups = iio_hrtimer_attr_groups; - hrtimer_init(&trig_info->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + hrtimer_init(&trig_info->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD); trig_info->timer.function = iio_hrtimer_trig_handler; trig_info->sampling_frequency = HRTIMER_DEFAULT_SAMPLING_FREQUENCY; diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index a77750b8954db0..c51b84b2d2f375 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -477,6 +477,10 @@ static void cma_release_dev(struct rdma_id_private *id_priv) list_del(&id_priv->list); cma_dev_put(id_priv->cma_dev); id_priv->cma_dev = NULL; + if (id_priv->id.route.addr.dev_addr.sgid_attr) { + rdma_put_gid_attr(id_priv->id.route.addr.dev_addr.sgid_attr); + id_priv->id.route.addr.dev_addr.sgid_attr = NULL; + } mutex_unlock(&lock); } @@ -1861,9 +1865,6 @@ static void _destroy_id(struct rdma_id_private *id_priv, kfree(id_priv->id.route.path_rec); - if (id_priv->id.route.addr.dev_addr.sgid_attr) - rdma_put_gid_attr(id_priv->id.route.addr.dev_addr.sgid_attr); - put_net(id_priv->id.route.addr.dev_addr.net); rdma_restrack_del(&id_priv->res); kfree(id_priv); @@ -2495,8 +2496,9 @@ static int cma_listen_handler(struct rdma_cm_id *id, return id_priv->id.event_handler(id, event); } -static void cma_listen_on_dev(struct rdma_id_private *id_priv, - struct cma_device *cma_dev) +static int cma_listen_on_dev(struct rdma_id_private *id_priv, + struct cma_device *cma_dev, + struct rdma_id_private **to_destroy) { struct rdma_id_private *dev_id_priv; struct net *net = id_priv->id.route.addr.dev_addr.net; @@ -2504,21 +2506,21 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv, lockdep_assert_held(&lock); + *to_destroy = NULL; if (cma_family(id_priv) == AF_IB && !rdma_cap_ib_cm(cma_dev->device, 1)) - return; + return 0; dev_id_priv = __rdma_create_id(net, cma_listen_handler, id_priv, id_priv->id.ps, id_priv->id.qp_type, id_priv); if (IS_ERR(dev_id_priv)) - return; + return PTR_ERR(dev_id_priv); dev_id_priv->state = RDMA_CM_ADDR_BOUND; memcpy(cma_src_addr(dev_id_priv), cma_src_addr(id_priv), rdma_addr_size(cma_src_addr(id_priv))); _cma_attach_to_dev(dev_id_priv, cma_dev); - list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list); cma_id_get(id_priv); dev_id_priv->internal_id = 1; dev_id_priv->afonly = id_priv->afonly; @@ -2527,19 +2529,42 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv, ret = rdma_listen(&dev_id_priv->id, id_priv->backlog); if (ret) - dev_warn(&cma_dev->device->dev, - "RDMA CMA: cma_listen_on_dev, error %d\n", ret); + goto err_listen; + list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list); + return 0; +err_listen: + /* Caller must destroy this after releasing lock */ + *to_destroy = dev_id_priv; + dev_warn(&cma_dev->device->dev, "RDMA CMA: %s, error %d\n", __func__, ret); + return ret; } -static void cma_listen_on_all(struct rdma_id_private *id_priv) +static int cma_listen_on_all(struct rdma_id_private *id_priv) { + struct rdma_id_private *to_destroy; struct cma_device *cma_dev; + int ret; mutex_lock(&lock); list_add_tail(&id_priv->list, &listen_any_list); - list_for_each_entry(cma_dev, &dev_list, list) - cma_listen_on_dev(id_priv, cma_dev); + list_for_each_entry(cma_dev, &dev_list, list) { + ret = cma_listen_on_dev(id_priv, cma_dev, &to_destroy); + if (ret) { + /* Prevent racing with cma_process_remove() */ + if (to_destroy) + list_del_init(&to_destroy->list); + goto err_listen; + } + } mutex_unlock(&lock); + return 0; + +err_listen: + list_del(&id_priv->list); + mutex_unlock(&lock); + if (to_destroy) + rdma_destroy_id(&to_destroy->id); + return ret; } void rdma_set_service_type(struct rdma_cm_id *id, int tos) @@ -3692,8 +3717,11 @@ int rdma_listen(struct rdma_cm_id *id, int backlog) ret = -ENOSYS; goto err; } - } else - cma_listen_on_all(id_priv); + } else { + ret = cma_listen_on_all(id_priv); + if (ret) + goto err; + } return 0; err: @@ -4773,69 +4801,6 @@ static struct notifier_block cma_nb = { .notifier_call = cma_netdev_callback }; -static int cma_add_one(struct ib_device *device) -{ - struct cma_device *cma_dev; - struct rdma_id_private *id_priv; - unsigned int i; - unsigned long supported_gids = 0; - int ret; - - cma_dev = kmalloc(sizeof *cma_dev, GFP_KERNEL); - if (!cma_dev) - return -ENOMEM; - - cma_dev->device = device; - cma_dev->default_gid_type = kcalloc(device->phys_port_cnt, - sizeof(*cma_dev->default_gid_type), - GFP_KERNEL); - if (!cma_dev->default_gid_type) { - ret = -ENOMEM; - goto free_cma_dev; - } - - cma_dev->default_roce_tos = kcalloc(device->phys_port_cnt, - sizeof(*cma_dev->default_roce_tos), - GFP_KERNEL); - if (!cma_dev->default_roce_tos) { - ret = -ENOMEM; - goto free_gid_type; - } - - rdma_for_each_port (device, i) { - supported_gids = roce_gid_type_mask_support(device, i); - WARN_ON(!supported_gids); - if (supported_gids & (1 << CMA_PREFERRED_ROCE_GID_TYPE)) - cma_dev->default_gid_type[i - rdma_start_port(device)] = - CMA_PREFERRED_ROCE_GID_TYPE; - else - cma_dev->default_gid_type[i - rdma_start_port(device)] = - find_first_bit(&supported_gids, BITS_PER_LONG); - cma_dev->default_roce_tos[i - rdma_start_port(device)] = 0; - } - - init_completion(&cma_dev->comp); - refcount_set(&cma_dev->refcount, 1); - INIT_LIST_HEAD(&cma_dev->id_list); - ib_set_client_data(device, &cma_client, cma_dev); - - mutex_lock(&lock); - list_add_tail(&cma_dev->list, &dev_list); - list_for_each_entry(id_priv, &listen_any_list, list) - cma_listen_on_dev(id_priv, cma_dev); - mutex_unlock(&lock); - - trace_cm_add_one(device); - return 0; - -free_gid_type: - kfree(cma_dev->default_gid_type); - -free_cma_dev: - kfree(cma_dev); - return ret; -} - static void cma_send_device_removal_put(struct rdma_id_private *id_priv) { struct rdma_cm_event event = { .event = RDMA_CM_EVENT_DEVICE_REMOVAL }; @@ -4898,6 +4863,80 @@ static void cma_process_remove(struct cma_device *cma_dev) wait_for_completion(&cma_dev->comp); } +static int cma_add_one(struct ib_device *device) +{ + struct rdma_id_private *to_destroy; + struct cma_device *cma_dev; + struct rdma_id_private *id_priv; + unsigned int i; + unsigned long supported_gids = 0; + int ret; + + cma_dev = kmalloc(sizeof(*cma_dev), GFP_KERNEL); + if (!cma_dev) + return -ENOMEM; + + cma_dev->device = device; + cma_dev->default_gid_type = kcalloc(device->phys_port_cnt, + sizeof(*cma_dev->default_gid_type), + GFP_KERNEL); + if (!cma_dev->default_gid_type) { + ret = -ENOMEM; + goto free_cma_dev; + } + + cma_dev->default_roce_tos = kcalloc(device->phys_port_cnt, + sizeof(*cma_dev->default_roce_tos), + GFP_KERNEL); + if (!cma_dev->default_roce_tos) { + ret = -ENOMEM; + goto free_gid_type; + } + + rdma_for_each_port (device, i) { + supported_gids = roce_gid_type_mask_support(device, i); + WARN_ON(!supported_gids); + if (supported_gids & (1 << CMA_PREFERRED_ROCE_GID_TYPE)) + cma_dev->default_gid_type[i - rdma_start_port(device)] = + CMA_PREFERRED_ROCE_GID_TYPE; + else + cma_dev->default_gid_type[i - rdma_start_port(device)] = + find_first_bit(&supported_gids, BITS_PER_LONG); + cma_dev->default_roce_tos[i - rdma_start_port(device)] = 0; + } + + init_completion(&cma_dev->comp); + refcount_set(&cma_dev->refcount, 1); + INIT_LIST_HEAD(&cma_dev->id_list); + ib_set_client_data(device, &cma_client, cma_dev); + + mutex_lock(&lock); + list_add_tail(&cma_dev->list, &dev_list); + list_for_each_entry(id_priv, &listen_any_list, list) { + ret = cma_listen_on_dev(id_priv, cma_dev, &to_destroy); + if (ret) + goto free_listen; + } + mutex_unlock(&lock); + + trace_cm_add_one(device); + return 0; + +free_listen: + list_del(&cma_dev->list); + mutex_unlock(&lock); + + /* cma_process_remove() will delete to_destroy */ + cma_process_remove(cma_dev); + kfree(cma_dev->default_roce_tos); +free_gid_type: + kfree(cma_dev->default_gid_type); + +free_cma_dev: + kfree(cma_dev); + return ret; +} + static void cma_remove_one(struct ib_device *device, void *client_data) { struct cma_device *cma_dev = client_data; diff --git a/drivers/infiniband/core/cma_configfs.c b/drivers/infiniband/core/cma_configfs.c index 7ec4af2ed87abb..35d1ec1095f9c3 100644 --- a/drivers/infiniband/core/cma_configfs.c +++ b/drivers/infiniband/core/cma_configfs.c @@ -131,8 +131,10 @@ static ssize_t default_roce_mode_store(struct config_item *item, return ret; gid_type = ib_cache_gid_parse_type_str(buf); - if (gid_type < 0) + if (gid_type < 0) { + cma_configfs_params_put(cma_dev); return -EINVAL; + } ret = cma_set_default_gid_type(cma_dev, group->port_num, gid_type); diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index a3b1fc84cdcab9..76b9c436edcd2f 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -1177,25 +1177,6 @@ static int assign_name(struct ib_device *device, const char *name) return ret; } -static void setup_dma_device(struct ib_device *device, - struct device *dma_device) -{ - /* - * If the caller does not provide a DMA capable device then the IB - * device will be used. In this case the caller should fully setup the - * ibdev for DMA. This usually means using dma_virt_ops. - */ -#ifdef CONFIG_DMA_VIRT_OPS - if (!dma_device) { - device->dev.dma_ops = &dma_virt_ops; - dma_device = &device->dev; - } -#endif - WARN_ON(!dma_device); - device->dma_device = dma_device; - WARN_ON(!device->dma_device->dma_parms); -} - /* * setup_device() allocates memory and sets up data that requires calling the * device ops, this is the only reason these actions are not done during @@ -1341,7 +1322,14 @@ int ib_register_device(struct ib_device *device, const char *name, if (ret) return ret; - setup_dma_device(device, dma_device); + /* + * If the caller does not provide a DMA capable device then the IB core + * will set up ib_sge and scatterlist structures that stash the kernel + * virtual address into the address field. + */ + WARN_ON(dma_device && !dma_device->dma_parms); + device->dma_device = dma_device; + ret = setup_device(device); if (ret) return ret; @@ -1374,9 +1362,6 @@ int ib_register_device(struct ib_device *device, const char *name, } ret = enable_device_and_get(device); - dev_set_uevent_suppress(&device->dev, false); - /* Mark for userspace that device is ready */ - kobject_uevent(&device->dev.kobj, KOBJ_ADD); if (ret) { void (*dealloc_fn)(struct ib_device *); @@ -1396,8 +1381,12 @@ int ib_register_device(struct ib_device *device, const char *name, ib_device_put(device); __ib_unregister_device(device); device->ops.dealloc_driver = dealloc_fn; + dev_set_uevent_suppress(&device->dev, false); return ret; } + dev_set_uevent_suppress(&device->dev, false); + /* Mark for userspace that device is ready */ + kobject_uevent(&device->dev.kobj, KOBJ_ADD); ib_device_put(device); return 0; @@ -2675,6 +2664,21 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) } EXPORT_SYMBOL(ib_set_device_ops); +#ifdef CONFIG_INFINIBAND_VIRT_DMA +int ib_dma_virt_map_sg(struct ib_device *dev, struct scatterlist *sg, int nents) +{ + struct scatterlist *s; + int i; + + for_each_sg(sg, s, nents, i) { + sg_dma_address(s) = (uintptr_t)sg_virt(s); + sg_dma_len(s) = s->length; + } + return nents; +} +EXPORT_SYMBOL(ib_dma_virt_map_sg); +#endif /* CONFIG_INFINIBAND_VIRT_DMA */ + static const struct rdma_nl_cbs ibnl_ls_cb_table[RDMA_NL_LS_NUM_OPS] = { [RDMA_NL_LS_OP_RESOLVE] = { .doit = ib_nl_handle_resolve_resp, diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c index 4aeeaaed0f17dd..bbbbec5b15939f 100644 --- a/drivers/infiniband/core/restrack.c +++ b/drivers/infiniband/core/restrack.c @@ -244,6 +244,7 @@ void rdma_restrack_add(struct rdma_restrack_entry *res) } else { ret = xa_alloc_cyclic(&rt->xa, &res->id, res, xa_limit_32b, &rt->next_id, GFP_KERNEL); + ret = (ret < 0) ? ret : 0; } if (!ret) diff --git a/drivers/infiniband/core/rw.c b/drivers/infiniband/core/rw.c index 13f43ab7220b05..a96030b784eb21 100644 --- a/drivers/infiniband/core/rw.c +++ b/drivers/infiniband/core/rw.c @@ -285,8 +285,11 @@ static void rdma_rw_unmap_sg(struct ib_device *dev, struct scatterlist *sg, static int rdma_rw_map_sg(struct ib_device *dev, struct scatterlist *sg, u32 sg_cnt, enum dma_data_direction dir) { - if (is_pci_p2pdma_page(sg_page(sg))) + if (is_pci_p2pdma_page(sg_page(sg))) { + if (WARN_ON_ONCE(ib_uses_virt_dma(dev))) + return 0; return pci_p2pdma_map_sg(dev->dma_device, sg, sg_cnt, dir); + } return ib_dma_map_sg(dev, sg, sg_cnt, dir); } diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index ffe2563ad34565..2cc785c1970b4c 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -95,8 +95,6 @@ struct ucma_context { u64 uid; struct list_head list; - /* sync between removal event and id destroy, protected by file mut */ - int destroying; struct work_struct close_work; }; @@ -122,7 +120,7 @@ static DEFINE_XARRAY_ALLOC(ctx_table); static DEFINE_XARRAY_ALLOC(multicast_table); static const struct file_operations ucma_fops; -static int __destroy_id(struct ucma_context *ctx); +static int ucma_destroy_private_ctx(struct ucma_context *ctx); static inline struct ucma_context *_ucma_find_context(int id, struct ucma_file *file) @@ -179,19 +177,14 @@ static void ucma_close_id(struct work_struct *work) /* once all inflight tasks are finished, we close all underlying * resources. The context is still alive till its explicit destryoing - * by its creator. + * by its creator. This puts back the xarray's reference. */ ucma_put_ctx(ctx); wait_for_completion(&ctx->comp); /* No new events will be generated after destroying the id. */ rdma_destroy_id(ctx->cm_id); - /* - * At this point ctx->ref is zero so the only place the ctx can be is in - * a uevent or in __destroy_id(). Since the former doesn't touch - * ctx->cm_id and the latter sync cancels this, there is no races with - * this store. - */ + /* Reading the cm_id without holding a positive ref is not allowed */ ctx->cm_id = NULL; } @@ -204,7 +197,6 @@ static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file) return NULL; INIT_WORK(&ctx->close_work, ucma_close_id); - refcount_set(&ctx->ref, 1); init_completion(&ctx->comp); /* So list_del() will work if we don't do ucma_finish_ctx() */ INIT_LIST_HEAD(&ctx->list); @@ -218,6 +210,13 @@ static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file) return ctx; } +static void ucma_set_ctx_cm_id(struct ucma_context *ctx, + struct rdma_cm_id *cm_id) +{ + refcount_set(&ctx->ref, 1); + ctx->cm_id = cm_id; +} + static void ucma_finish_ctx(struct ucma_context *ctx) { lockdep_assert_held(&ctx->file->mut); @@ -303,7 +302,7 @@ static int ucma_connect_event_handler(struct rdma_cm_id *cm_id, ctx = ucma_alloc_ctx(listen_ctx->file); if (!ctx) goto err_backlog; - ctx->cm_id = cm_id; + ucma_set_ctx_cm_id(ctx, cm_id); uevent = ucma_create_uevent(listen_ctx, event); if (!uevent) @@ -321,8 +320,7 @@ static int ucma_connect_event_handler(struct rdma_cm_id *cm_id, return 0; err_alloc: - xa_erase(&ctx_table, ctx->id); - kfree(ctx); + ucma_destroy_private_ctx(ctx); err_backlog: atomic_inc(&listen_ctx->backlog); /* Returning error causes the new ID to be destroyed */ @@ -356,8 +354,12 @@ static int ucma_event_handler(struct rdma_cm_id *cm_id, wake_up_interruptible(&ctx->file->poll_wait); } - if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL && !ctx->destroying) - queue_work(system_unbound_wq, &ctx->close_work); + if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) { + xa_lock(&ctx_table); + if (xa_load(&ctx_table, ctx->id) == ctx) + queue_work(system_unbound_wq, &ctx->close_work); + xa_unlock(&ctx_table); + } return 0; } @@ -461,13 +463,12 @@ static ssize_t ucma_create_id(struct ucma_file *file, const char __user *inbuf, ret = PTR_ERR(cm_id); goto err1; } - ctx->cm_id = cm_id; + ucma_set_ctx_cm_id(ctx, cm_id); resp.id = ctx->id; if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof(resp))) { - xa_erase(&ctx_table, ctx->id); - __destroy_id(ctx); + ucma_destroy_private_ctx(ctx); return -EFAULT; } @@ -477,8 +478,7 @@ static ssize_t ucma_create_id(struct ucma_file *file, const char __user *inbuf, return 0; err1: - xa_erase(&ctx_table, ctx->id); - kfree(ctx); + ucma_destroy_private_ctx(ctx); return ret; } @@ -516,68 +516,73 @@ static void ucma_cleanup_mc_events(struct ucma_multicast *mc) rdma_unlock_handler(mc->ctx->cm_id); } -/* - * ucma_free_ctx is called after the underlying rdma CM-ID is destroyed. At - * this point, no new events will be reported from the hardware. However, we - * still need to cleanup the UCMA context for this ID. Specifically, there - * might be events that have not yet been consumed by the user space software. - * mutex. After that we release them as needed. - */ -static int ucma_free_ctx(struct ucma_context *ctx) +static int ucma_cleanup_ctx_events(struct ucma_context *ctx) { int events_reported; struct ucma_event *uevent, *tmp; LIST_HEAD(list); - ucma_cleanup_multicast(ctx); - - /* Cleanup events not yet reported to the user. */ + /* Cleanup events not yet reported to the user.*/ mutex_lock(&ctx->file->mut); list_for_each_entry_safe(uevent, tmp, &ctx->file->event_list, list) { - if (uevent->ctx == ctx || uevent->conn_req_ctx == ctx) + if (uevent->ctx != ctx) + continue; + + if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST && + xa_cmpxchg(&ctx_table, uevent->conn_req_ctx->id, + uevent->conn_req_ctx, XA_ZERO_ENTRY, + GFP_KERNEL) == uevent->conn_req_ctx) { list_move_tail(&uevent->list, &list); + continue; + } + list_del(&uevent->list); + kfree(uevent); } list_del(&ctx->list); events_reported = ctx->events_reported; mutex_unlock(&ctx->file->mut); /* - * If this was a listening ID then any connections spawned from it - * that have not been delivered to userspace are cleaned up too. - * Must be done outside any locks. + * If this was a listening ID then any connections spawned from it that + * have not been delivered to userspace are cleaned up too. Must be done + * outside any locks. */ list_for_each_entry_safe(uevent, tmp, &list, list) { - list_del(&uevent->list); - if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST && - uevent->conn_req_ctx != ctx) - __destroy_id(uevent->conn_req_ctx); + ucma_destroy_private_ctx(uevent->conn_req_ctx); kfree(uevent); } - - mutex_destroy(&ctx->mutex); - kfree(ctx); return events_reported; } -static int __destroy_id(struct ucma_context *ctx) +/* + * When this is called the xarray must have a XA_ZERO_ENTRY in the ctx->id (ie + * the ctx is not public to the user). This either because: + * - ucma_finish_ctx() hasn't been called + * - xa_cmpxchg() succeed to remove the entry (only one thread can succeed) + */ +static int ucma_destroy_private_ctx(struct ucma_context *ctx) { + int events_reported; + /* - * If the refcount is already 0 then ucma_close_id() has already - * destroyed the cm_id, otherwise holding the refcount keeps cm_id - * valid. Prevent queue_work() from being called. + * Destroy the underlying cm_id. New work queuing is prevented now by + * the removal from the xarray. Once the work is cancled ref will either + * be 0 because the work ran to completion and consumed the ref from the + * xarray, or it will be positive because we still have the ref from the + * xarray. This can also be 0 in cases where cm_id was never set */ - if (refcount_inc_not_zero(&ctx->ref)) { - rdma_lock_handler(ctx->cm_id); - ctx->destroying = 1; - rdma_unlock_handler(ctx->cm_id); - ucma_put_ctx(ctx); - } - cancel_work_sync(&ctx->close_work); - /* At this point it's guaranteed that there is no inflight closing task */ - if (ctx->cm_id) + if (refcount_read(&ctx->ref)) ucma_close_id(&ctx->close_work); - return ucma_free_ctx(ctx); + + events_reported = ucma_cleanup_ctx_events(ctx); + ucma_cleanup_multicast(ctx); + + WARN_ON(xa_cmpxchg(&ctx_table, ctx->id, XA_ZERO_ENTRY, NULL, + GFP_KERNEL) != NULL); + mutex_destroy(&ctx->mutex); + kfree(ctx); + return events_reported; } static ssize_t ucma_destroy_id(struct ucma_file *file, const char __user *inbuf, @@ -596,14 +601,17 @@ static ssize_t ucma_destroy_id(struct ucma_file *file, const char __user *inbuf, xa_lock(&ctx_table); ctx = _ucma_find_context(cmd.id, file); - if (!IS_ERR(ctx)) - __xa_erase(&ctx_table, ctx->id); + if (!IS_ERR(ctx)) { + if (__xa_cmpxchg(&ctx_table, ctx->id, ctx, XA_ZERO_ENTRY, + GFP_KERNEL) != ctx) + ctx = ERR_PTR(-ENOENT); + } xa_unlock(&ctx_table); if (IS_ERR(ctx)) return PTR_ERR(ctx); - resp.events_reported = __destroy_id(ctx); + resp.events_reported = ucma_destroy_private_ctx(ctx); if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof(resp))) ret = -EFAULT; @@ -1777,15 +1785,16 @@ static int ucma_close(struct inode *inode, struct file *filp) * prevented by this being a FD release function. The list_add_tail() in * ucma_connect_event_handler() can run concurrently, however it only * adds to the list *after* a listening ID. By only reading the first of - * the list, and relying on __destroy_id() to block + * the list, and relying on ucma_destroy_private_ctx() to block * ucma_connect_event_handler(), no additional locking is needed. */ while (!list_empty(&file->ctx_list)) { struct ucma_context *ctx = list_first_entry( &file->ctx_list, struct ucma_context, list); - xa_erase(&ctx_table, ctx->id); - __destroy_id(ctx); + WARN_ON(xa_cmpxchg(&ctx_table, ctx->id, ctx, XA_ZERO_ENTRY, + GFP_KERNEL) != ctx); + ucma_destroy_private_ctx(ctx); } kfree(file); return 0; diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index e9fecbdf391bcc..5157ae29a4460b 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -126,7 +126,7 @@ unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem, */ if (mask) pgsz_bitmap &= GENMASK(count_trailing_zeros(mask), 0); - return rounddown_pow_of_two(pgsz_bitmap); + return pgsz_bitmap ? rounddown_pow_of_two(pgsz_bitmap) : 0; } EXPORT_SYMBOL(ib_umem_find_best_pgsz); diff --git a/drivers/infiniband/core/uverbs_std_types_device.c b/drivers/infiniband/core/uverbs_std_types_device.c index 302f898c5833fc..9ec6971056fa85 100644 --- a/drivers/infiniband/core/uverbs_std_types_device.c +++ b/drivers/infiniband/core/uverbs_std_types_device.c @@ -317,8 +317,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_QUERY_GID_TABLE)( struct ib_device *ib_dev; size_t user_entry_size; ssize_t num_entries; - size_t max_entries; - size_t num_bytes; + int max_entries; u32 flags; int ret; @@ -336,19 +335,16 @@ static int UVERBS_HANDLER(UVERBS_METHOD_QUERY_GID_TABLE)( attrs, UVERBS_ATTR_QUERY_GID_TABLE_RESP_ENTRIES, user_entry_size); if (max_entries <= 0) - return -EINVAL; + return max_entries ?: -EINVAL; ucontext = ib_uverbs_get_ucontext(attrs); if (IS_ERR(ucontext)) return PTR_ERR(ucontext); ib_dev = ucontext->device; - if (check_mul_overflow(max_entries, sizeof(*entries), &num_bytes)) - return -EINVAL; - - entries = uverbs_zalloc(attrs, num_bytes); - if (!entries) - return -ENOMEM; + entries = uverbs_kcalloc(attrs, max_entries, sizeof(*entries)); + if (IS_ERR(entries)) + return PTR_ERR(entries); num_entries = rdma_query_gid_table(ib_dev, entries, max_entries); if (num_entries < 0) diff --git a/drivers/infiniband/core/uverbs_std_types_mr.c b/drivers/infiniband/core/uverbs_std_types_mr.c index 9b22bb553e8b32..dc58564417292a 100644 --- a/drivers/infiniband/core/uverbs_std_types_mr.c +++ b/drivers/infiniband/core/uverbs_std_types_mr.c @@ -33,6 +33,7 @@ #include "rdma_core.h" #include "uverbs.h" #include +#include "restrack.h" static int uverbs_free_mr(struct ib_uobject *uobject, enum rdma_remove_reason why, @@ -134,6 +135,9 @@ static int UVERBS_HANDLER(UVERBS_METHOD_DM_MR_REG)( atomic_inc(&pd->usecnt); atomic_inc(&dm->usecnt); + rdma_restrack_new(&mr->res, RDMA_RESTRACK_MR); + rdma_restrack_set_name(&mr->res, NULL); + rdma_restrack_add(&mr->res); uobj->object = mr; uverbs_finalize_uobj_create(attrs, UVERBS_ATTR_REG_DM_MR_HANDLE); diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 740f8454b6b461..3d895cc41c3ad9 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -1698,8 +1698,10 @@ static int _ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr, slave = rdma_lag_get_ah_roce_slave(qp->device, &attr->ah_attr, GFP_KERNEL); - if (IS_ERR(slave)) + if (IS_ERR(slave)) { + ret = PTR_ERR(slave); goto out_av; + } attr->xmit_slave = slave; } } diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index cf3db96283976b..266de55f57192d 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -1657,8 +1657,8 @@ int bnxt_re_create_srq(struct ib_srq *ib_srq, srq->qplib_srq.max_wqe = entries; srq->qplib_srq.max_sge = srq_init_attr->attr.max_sge; - srq->qplib_srq.wqe_size = - bnxt_re_get_rwqe_size(srq->qplib_srq.max_sge); + /* 128 byte wqe size for SRQ . So use max sges */ + srq->qplib_srq.wqe_size = bnxt_re_get_rwqe_size(dev_attr->max_srq_sges); srq->qplib_srq.threshold = srq_init_attr->attr.srq_limit; srq->srq_limit = srq_init_attr->attr.srq_limit; srq->qplib_srq.eventq_hw_ring_id = rdev->nq[0].ring_id; @@ -2078,6 +2078,7 @@ int bnxt_re_query_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr, goto out; } qp_attr->qp_state = __to_ib_qp_state(qplib_qp->state); + qp_attr->cur_qp_state = __to_ib_qp_state(qplib_qp->cur_qp_state); qp_attr->en_sqd_async_notify = qplib_qp->en_sqd_async_notify ? 1 : 0; qp_attr->qp_access_flags = __to_ib_access_flags(qplib_qp->access); qp_attr->pkey_index = qplib_qp->pkey_index; diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index 28349ed5088540..d6cfefc269ee3a 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -1008,6 +1008,9 @@ int c4iw_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, if (attr->flags) return -EINVAL; + if (entries < 1 || entries > ibdev->attrs.max_cqe) + return -EINVAL; + if (vector >= rhp->rdev.lldi.nciq) return -EINVAL; diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index f20379e4e2ec22..5df4bb52bb10f6 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -2471,7 +2471,7 @@ int c4iw_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, init_attr->cap.max_send_wr = qhp->attr.sq_num_entries; init_attr->cap.max_recv_wr = qhp->attr.rq_num_entries; init_attr->cap.max_send_sge = qhp->attr.sq_max_sges; - init_attr->cap.max_recv_sge = qhp->attr.sq_max_sges; + init_attr->cap.max_recv_sge = qhp->attr.rq_max_sges; init_attr->cap.max_inline_data = T4_MAX_SEND_INLINE; init_attr->sq_sig_type = qhp->sq_sig_all ? IB_SIGNAL_ALL_WR : 0; return 0; diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c index 75b06db60f7c2a..174b19e397124f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_ah.c +++ b/drivers/infiniband/hw/hns/hns_roce_ah.c @@ -31,14 +31,11 @@ */ #include +#include #include #include #include "hns_roce_device.h" -#define HNS_ROCE_PORT_NUM_SHIFT 24 -#define HNS_ROCE_VLAN_SL_BIT_MASK 7 -#define HNS_ROCE_VLAN_SL_SHIFT 13 - static inline u16 get_ah_udp_sport(const struct rdma_ah_attr *ah_attr) { u32 fl = ah_attr->grh.flow_label; @@ -58,47 +55,38 @@ static inline u16 get_ah_udp_sport(const struct rdma_ah_attr *ah_attr) int hns_roce_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, struct ib_udata *udata) { - struct hns_roce_dev *hr_dev = to_hr_dev(ibah->device); - const struct ib_gid_attr *gid_attr; - struct device *dev = hr_dev->dev; - struct hns_roce_ah *ah = to_hr_ah(ibah); struct rdma_ah_attr *ah_attr = init_attr->ah_attr; const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr); - u16 vlan_id = 0xffff; - bool vlan_en = false; - int ret; - - gid_attr = ah_attr->grh.sgid_attr; - ret = rdma_read_gid_l2_fields(gid_attr, &vlan_id, NULL); - if (ret) - return ret; - - /* Get mac address */ - memcpy(ah->av.mac, ah_attr->roce.dmac, ETH_ALEN); - - if (vlan_id < VLAN_N_VID) { - vlan_en = true; - vlan_id |= (rdma_ah_get_sl(ah_attr) & - HNS_ROCE_VLAN_SL_BIT_MASK) << - HNS_ROCE_VLAN_SL_SHIFT; - } + struct hns_roce_dev *hr_dev = to_hr_dev(ibah->device); + struct hns_roce_ah *ah = to_hr_ah(ibah); + int ret = 0; ah->av.port = rdma_ah_get_port_num(ah_attr); ah->av.gid_index = grh->sgid_index; - ah->av.vlan_id = vlan_id; - ah->av.vlan_en = vlan_en; - dev_dbg(dev, "gid_index = 0x%x,vlan_id = 0x%x\n", ah->av.gid_index, - ah->av.vlan_id); if (rdma_ah_get_static_rate(ah_attr)) ah->av.stat_rate = IB_RATE_10_GBPS; - memcpy(ah->av.dgid, grh->dgid.raw, HNS_ROCE_GID_SIZE); - ah->av.sl = rdma_ah_get_sl(ah_attr); + ah->av.hop_limit = grh->hop_limit; ah->av.flowlabel = grh->flow_label; ah->av.udp_sport = get_ah_udp_sport(ah_attr); + ah->av.sl = rdma_ah_get_sl(ah_attr); + ah->av.tclass = get_tclass(grh); - return 0; + memcpy(ah->av.dgid, grh->dgid.raw, HNS_ROCE_GID_SIZE); + memcpy(ah->av.mac, ah_attr->roce.dmac, ETH_ALEN); + + /* HIP08 needs to record vlan info in Address Vector */ + if (hr_dev->pci_dev->revision <= PCI_REVISION_ID_HIP08) { + ret = rdma_read_gid_l2_fields(ah_attr->grh.sgid_attr, + &ah->av.vlan_id, NULL); + if (ret) + return ret; + + ah->av.vlan_en = ah->av.vlan_id < VLAN_N_VID; + } + + return ret; } int hns_roce_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr) diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index 809b22aa5056c4..da346129f6e9ee 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -274,7 +274,7 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr, if (udata) { ret = ib_copy_from_udata(&ucmd, udata, - min(sizeof(ucmd), udata->inlen)); + min(udata->inlen, sizeof(ucmd))); if (ret) { ibdev_err(ibdev, "Failed to copy CQ udata, err %d\n", ret); @@ -313,7 +313,8 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr, if (udata) { resp.cqn = hr_cq->cqn; - ret = ib_copy_to_udata(udata, &resp, sizeof(resp)); + ret = ib_copy_to_udata(udata, &resp, + min(udata->outlen, sizeof(resp))); if (ret) goto err_cqc; } diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 6d2acff69f982f..1ea87f92aabbec 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -547,7 +547,7 @@ struct hns_roce_av { u8 dgid[HNS_ROCE_GID_SIZE]; u8 mac[ETH_ALEN]; u16 vlan_id; - bool vlan_en; + u8 vlan_en; }; struct hns_roce_ah { @@ -1132,6 +1132,14 @@ static inline u32 to_hr_hem_entries_shift(u32 count, u32 buf_shift) return ilog2(to_hr_hem_entries_count(count, buf_shift)); } +#define DSCP_SHIFT 2 + +static inline u8 get_tclass(const struct ib_global_route *grh) +{ + return grh->sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP ? + grh->traffic_class >> DSCP_SHIFT : grh->traffic_class; +} + int hns_roce_init_uar_table(struct hns_roce_dev *dev); int hns_roce_uar_alloc(struct hns_roce_dev *dev, struct hns_roce_uar *uar); void hns_roce_uar_free(struct hns_roce_dev *dev, struct hns_roce_uar *uar); diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c index 7487cf3d2c37aa..66f9f036ef9465 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.c +++ b/drivers/infiniband/hw/hns/hns_roce_hem.c @@ -1017,7 +1017,7 @@ void hns_roce_cleanup_hem_table(struct hns_roce_dev *hr_dev, void hns_roce_cleanup_hem(struct hns_roce_dev *hr_dev) { - if (hr_dev->caps.srqc_entry_sz) + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) hns_roce_cleanup_hem_table(hr_dev, &hr_dev->srq_table.table); hns_roce_cleanup_hem_table(hr_dev, &hr_dev->cq_table.table); @@ -1027,7 +1027,7 @@ void hns_roce_cleanup_hem(struct hns_roce_dev *hr_dev) if (hr_dev->caps.cqc_timer_entry_sz) hns_roce_cleanup_hem_table(hr_dev, &hr_dev->cqc_timer_table); - if (hr_dev->caps.sccc_sz) + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL) hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qp_table.sccc_table); if (hr_dev->caps.trrl_entry_sz) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 0468028ffe390d..5c29c7d8c50e6d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -214,25 +214,20 @@ static int fill_ext_sge_inl_data(struct hns_roce_qp *qp, return 0; } -static void set_extend_sge(struct hns_roce_qp *qp, const struct ib_send_wr *wr, - unsigned int *sge_ind, unsigned int valid_num_sge) +static void set_extend_sge(struct hns_roce_qp *qp, struct ib_sge *sge, + unsigned int *sge_ind, unsigned int cnt) { struct hns_roce_v2_wqe_data_seg *dseg; - unsigned int cnt = valid_num_sge; - struct ib_sge *sge = wr->sg_list; unsigned int idx = *sge_ind; - if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) { - cnt -= HNS_ROCE_SGE_IN_WQE; - sge += HNS_ROCE_SGE_IN_WQE; - } - while (cnt > 0) { dseg = hns_roce_get_extend_sge(qp, idx & (qp->sge.sge_cnt - 1)); - set_data_seg_v2(dseg, sge); - idx++; + if (likely(sge->length)) { + set_data_seg_v2(dseg, sge); + idx++; + cnt--; + } sge++; - cnt--; } *sge_ind = idx; @@ -340,7 +335,8 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr, } } - set_extend_sge(qp, wr, sge_ind, valid_num_sge); + set_extend_sge(qp, wr->sg_list + i, sge_ind, + valid_num_sge - HNS_ROCE_SGE_IN_WQE); } roce_set_field(rc_sq_wqe->byte_16, @@ -433,8 +429,6 @@ static inline int set_ud_wqe(struct hns_roce_qp *qp, unsigned int curr_idx = *sge_idx; int valid_num_sge; u32 msg_len = 0; - bool loopback; - u8 *smac; int ret; valid_num_sge = calc_wr_sge_num(wr, &msg_len); @@ -457,13 +451,6 @@ static inline int set_ud_wqe(struct hns_roce_qp *qp, roce_set_field(ud_sq_wqe->byte_48, V2_UD_SEND_WQE_BYTE_48_DMAC_5_M, V2_UD_SEND_WQE_BYTE_48_DMAC_5_S, ah->av.mac[5]); - /* MAC loopback */ - smac = (u8 *)hr_dev->dev_addr[qp->port]; - loopback = ether_addr_equal_unaligned(ah->av.mac, smac) ? 1 : 0; - - roce_set_bit(ud_sq_wqe->byte_40, - V2_UD_SEND_WQE_BYTE_40_LBI_S, loopback); - ud_sq_wqe->msg_len = cpu_to_le32(msg_len); /* Set sig attr */ @@ -495,8 +482,6 @@ static inline int set_ud_wqe(struct hns_roce_qp *qp, roce_set_field(ud_sq_wqe->byte_32, V2_UD_SEND_WQE_BYTE_32_DQPN_M, V2_UD_SEND_WQE_BYTE_32_DQPN_S, ud_wr(wr)->remote_qpn); - roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_VLAN_M, - V2_UD_SEND_WQE_BYTE_36_VLAN_S, ah->av.vlan_id); roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_M, V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_S, ah->av.hop_limit); roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_TCLASS_M, @@ -508,14 +493,21 @@ static inline int set_ud_wqe(struct hns_roce_qp *qp, roce_set_field(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_PORTN_M, V2_UD_SEND_WQE_BYTE_40_PORTN_S, qp->port); - roce_set_bit(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_UD_VLAN_EN_S, - ah->av.vlan_en ? 1 : 0); roce_set_field(ud_sq_wqe->byte_48, V2_UD_SEND_WQE_BYTE_48_SGID_INDX_M, V2_UD_SEND_WQE_BYTE_48_SGID_INDX_S, ah->av.gid_index); + if (hr_dev->pci_dev->revision <= PCI_REVISION_ID_HIP08) { + roce_set_bit(ud_sq_wqe->byte_40, + V2_UD_SEND_WQE_BYTE_40_UD_VLAN_EN_S, + ah->av.vlan_en); + roce_set_field(ud_sq_wqe->byte_36, + V2_UD_SEND_WQE_BYTE_36_VLAN_M, + V2_UD_SEND_WQE_BYTE_36_VLAN_S, ah->av.vlan_id); + } + memcpy(&ud_sq_wqe->dgid[0], &ah->av.dgid[0], GID_LEN_V2); - set_extend_sge(qp, wr, &curr_idx, valid_num_sge); + set_extend_sge(qp, wr->sg_list, &curr_idx, valid_num_sge); *sge_idx = curr_idx; @@ -4468,15 +4460,11 @@ static int hns_roce_v2_set_path(struct ib_qp *ibqp, roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_HOP_LIMIT_M, V2_QPC_BYTE_24_HOP_LIMIT_S, 0); - if (is_udp) - roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M, - V2_QPC_BYTE_24_TC_S, grh->traffic_class >> 2); - else - roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M, - V2_QPC_BYTE_24_TC_S, grh->traffic_class); - + roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M, + V2_QPC_BYTE_24_TC_S, get_tclass(&attr->ah_attr.grh)); roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M, V2_QPC_BYTE_24_TC_S, 0); + roce_set_field(context->byte_28_at_fl, V2_QPC_BYTE_28_FL_M, V2_QPC_BYTE_28_FL_S, grh->flow_label); roce_set_field(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_FL_M, diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index afeffafc59f906..ae721fa61e0e4a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -325,7 +325,8 @@ static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx, resp.cqe_size = hr_dev->caps.cqe_sz; - ret = ib_copy_to_udata(udata, &resp, sizeof(resp)); + ret = ib_copy_to_udata(udata, &resp, + min(udata->outlen, sizeof(resp))); if (ret) goto error_fail_copy_to_udata; @@ -631,7 +632,7 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev) goto err_unmap_trrl; } - if (hr_dev->caps.srqc_entry_sz) { + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) { ret = hns_roce_init_hem_table(hr_dev, &hr_dev->srq_table.table, HEM_TYPE_SRQC, hr_dev->caps.srqc_entry_sz, @@ -643,7 +644,7 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev) } } - if (hr_dev->caps.sccc_sz) { + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL) { ret = hns_roce_init_hem_table(hr_dev, &hr_dev->qp_table.sccc_table, HEM_TYPE_SCCC, @@ -687,11 +688,11 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev) hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qpc_timer_table); err_unmap_ctx: - if (hr_dev->caps.sccc_sz) + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL) hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qp_table.sccc_table); err_unmap_srq: - if (hr_dev->caps.srqc_entry_sz) + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) hns_roce_cleanup_hem_table(hr_dev, &hr_dev->srq_table.table); err_unmap_cq: diff --git a/drivers/infiniband/hw/hns/hns_roce_pd.c b/drivers/infiniband/hw/hns/hns_roce_pd.c index 98f69496adb495..f78fa1d3d8075f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_pd.c +++ b/drivers/infiniband/hw/hns/hns_roce_pd.c @@ -70,16 +70,17 @@ int hns_roce_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) } if (udata) { - struct hns_roce_ib_alloc_pd_resp uresp = {.pdn = pd->pdn}; + struct hns_roce_ib_alloc_pd_resp resp = {.pdn = pd->pdn}; - if (ib_copy_to_udata(udata, &uresp, sizeof(uresp))) { + ret = ib_copy_to_udata(udata, &resp, + min(udata->outlen, sizeof(resp))); + if (ret) { hns_roce_pd_free(to_hr_dev(ib_dev), pd->pdn); - ibdev_err(ib_dev, "failed to copy to udata\n"); - return -EFAULT; + ibdev_err(ib_dev, "failed to copy to udata, ret = %d\n", ret); } } - return 0; + return ret; } int hns_roce_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata) diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 6c081dd985fc94..ef1452215b17d8 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -286,7 +286,7 @@ static int alloc_qpc(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) } } - if (hr_dev->caps.sccc_sz) { + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL) { /* Alloc memory for SCC CTX */ ret = hns_roce_table_get(hr_dev, &qp_table->sccc_table, hr_qp->qpn); @@ -432,7 +432,12 @@ static int set_extend_sge_param(struct hns_roce_dev *hr_dev, u32 sq_wqe_cnt, } hr_qp->sge.sge_shift = HNS_ROCE_SGE_SHIFT; - hr_qp->sge.sge_cnt = cnt; + + /* If the number of extended sge is not zero, they MUST use the + * space of HNS_HW_PAGE_SIZE at least. + */ + hr_qp->sge.sge_cnt = cnt ? + max(cnt, (u32)HNS_HW_PAGE_SIZE / HNS_ROCE_SGE_SIZE) : 0; return 0; } @@ -860,9 +865,12 @@ static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, } if (udata) { - if (ib_copy_from_udata(ucmd, udata, sizeof(*ucmd))) { - ibdev_err(ibdev, "Failed to copy QP ucmd\n"); - return -EFAULT; + ret = ib_copy_from_udata(ucmd, udata, + min(udata->inlen, sizeof(*ucmd))); + if (ret) { + ibdev_err(ibdev, + "failed to copy QP ucmd, ret = %d\n", ret); + return ret; } ret = set_user_sq_size(hr_dev, &init_attr->cap, hr_qp, ucmd); diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index 8caf74e44efd96..75d74f4bb52c94 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -300,7 +300,8 @@ int hns_roce_create_srq(struct ib_srq *ib_srq, srq->max_gs = init_attr->attr.max_sge; if (udata) { - ret = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)); + ret = ib_copy_from_udata(&ucmd, udata, + min(udata->inlen, sizeof(ucmd))); if (ret) { ibdev_err(ibdev, "Failed to copy SRQ udata, err %d\n", ret); @@ -343,11 +344,10 @@ int hns_roce_create_srq(struct ib_srq *ib_srq, resp.srqn = srq->srqn; if (udata) { - if (ib_copy_to_udata(udata, &resp, - min(udata->outlen, sizeof(resp)))) { - ret = -EFAULT; + ret = ib_copy_to_udata(udata, &resp, + min(udata->outlen, sizeof(resp))); + if (ret) goto err_srqc_alloc; - } } return 0; diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 246e3cbe0b2c7c..e317d7d6d5c0d2 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -3305,8 +3305,7 @@ static int mlx5_add_netdev_notifier(struct mlx5_ib_dev *dev, u8 port_num) int err; dev->port[port_num].roce.nb.notifier_call = mlx5_netdev_event; - err = register_netdevice_notifier_net(mlx5_core_net(dev->mdev), - &dev->port[port_num].roce.nb); + err = register_netdevice_notifier(&dev->port[port_num].roce.nb); if (err) { dev->port[port_num].roce.nb.notifier_call = NULL; return err; @@ -3318,8 +3317,7 @@ static int mlx5_add_netdev_notifier(struct mlx5_ib_dev *dev, u8 port_num) static void mlx5_remove_netdev_notifier(struct mlx5_ib_dev *dev, u8 port_num) { if (dev->port[port_num].roce.nb.notifier_call) { - unregister_netdevice_notifier_net(mlx5_core_net(dev->mdev), - &dev->port[port_num].roce.nb); + unregister_netdevice_notifier(&dev->port[port_num].roce.nb); dev->port[port_num].roce.nb.notifier_call = NULL; } } @@ -3950,7 +3948,7 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) err = set_has_smi_cap(dev); if (err) - return err; + goto err_mp; if (!mlx5_core_mp_enabled(mdev)) { for (i = 1; i <= dev->num_ports; i++) { @@ -4362,7 +4360,7 @@ static int mlx5_ib_stage_bfrag_init(struct mlx5_ib_dev *dev) err = mlx5_alloc_bfreg(dev->mdev, &dev->fp_bfreg, false, true); if (err) - mlx5_free_bfreg(dev->mdev, &dev->fp_bfreg); + mlx5_free_bfreg(dev->mdev, &dev->bfreg); return err; } diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index b261797b258fd7..971694e781b653 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -642,6 +642,7 @@ void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) if (mlx5_mr_cache_invalidate(mr)) { detach_mr_from_cache(mr); destroy_mkey(dev, mr); + kfree(mr); return; } @@ -1247,10 +1248,8 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, } static void set_mr_fields(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr, - int npages, u64 length, int access_flags) + u64 length, int access_flags) { - mr->npages = npages; - atomic_add(npages, &dev->mdev->priv.reg_pages); mr->ibmr.lkey = mr->mmkey.key; mr->ibmr.rkey = mr->mmkey.key; mr->ibmr.length = length; @@ -1290,8 +1289,7 @@ static struct ib_mr *mlx5_ib_get_dm_mr(struct ib_pd *pd, u64 start_addr, kfree(in); - mr->umem = NULL; - set_mr_fields(dev, mr, 0, length, acc); + set_mr_fields(dev, mr, length, acc); return &mr->ibmr; @@ -1419,7 +1417,9 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmkey.key); mr->umem = umem; - set_mr_fields(dev, mr, npages, length, access_flags); + mr->npages = npages; + atomic_add(mr->npages, &dev->mdev->priv.reg_pages); + set_mr_fields(dev, mr, length, access_flags); if (xlt_with_umr && !(access_flags & IB_ACCESS_ON_DEMAND)) { /* @@ -1531,8 +1531,6 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n", start, virt_addr, length, access_flags); - atomic_sub(mr->npages, &dev->mdev->priv.reg_pages); - if (!mr->umem) return -EINVAL; @@ -1553,12 +1551,17 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, * used. */ flags |= IB_MR_REREG_TRANS; + atomic_sub(mr->npages, &dev->mdev->priv.reg_pages); + mr->npages = 0; ib_umem_release(mr->umem); mr->umem = NULL; + err = mr_umem_get(dev, addr, len, access_flags, &mr->umem, &npages, &page_shift, &ncont, &order); if (err) goto err; + mr->npages = ncont; + atomic_add(mr->npages, &dev->mdev->priv.reg_pages); } if (!mlx5_ib_can_reconfig_with_umr(dev, mr->access_flags, @@ -1609,7 +1612,7 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, goto err; } - set_mr_fields(dev, mr, npages, len, access_flags); + set_mr_fields(dev, mr, len, access_flags); return 0; diff --git a/drivers/infiniband/hw/mthca/mthca_cq.c b/drivers/infiniband/hw/mthca/mthca_cq.c index 119b2573c9a08c..26c3408dcacaea 100644 --- a/drivers/infiniband/hw/mthca/mthca_cq.c +++ b/drivers/infiniband/hw/mthca/mthca_cq.c @@ -604,7 +604,7 @@ static inline int mthca_poll_one(struct mthca_dev *dev, entry->byte_len = MTHCA_ATOMIC_BYTE_LEN; break; default: - entry->opcode = MTHCA_OPCODE_INVALID; + entry->opcode = 0xFF; break; } } else { diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h index 9dbbf4d16796a4..a445160de3e16c 100644 --- a/drivers/infiniband/hw/mthca/mthca_dev.h +++ b/drivers/infiniband/hw/mthca/mthca_dev.h @@ -105,7 +105,6 @@ enum { MTHCA_OPCODE_ATOMIC_CS = 0x11, MTHCA_OPCODE_ATOMIC_FA = 0x12, MTHCA_OPCODE_BIND_MW = 0x18, - MTHCA_OPCODE_INVALID = 0xff }; enum { diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index 7350fe16f164d3..81a560056cd520 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -434,9 +434,9 @@ static void ocrdma_dealloc_ucontext_pd(struct ocrdma_ucontext *uctx) pr_err("%s(%d) Freeing in use pdid=0x%x.\n", __func__, dev->id, pd->id); } - kfree(uctx->cntxt_pd); uctx->cntxt_pd = NULL; _ocrdma_dealloc_pd(dev, pd); + kfree(pd); } static struct ocrdma_pd *ocrdma_get_ucontext_pd(struct ocrdma_ucontext *uctx) diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c index 9e961f8ffa10de..6a2b7d1d184cad 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c @@ -214,6 +214,7 @@ find_free_vf_and_create_qp_grp(struct usnic_ib_dev *us_ibdev, } usnic_uiom_free_dev_list(dev_list); + dev_list = NULL; } /* Try to find resources on an unused vf */ @@ -239,6 +240,8 @@ find_free_vf_and_create_qp_grp(struct usnic_ib_dev *us_ibdev, qp_grp_check: if (IS_ERR_OR_NULL(qp_grp)) { usnic_err("Failed to allocate qp_grp\n"); + if (usnic_ib_share_vf) + usnic_uiom_free_dev_list(dev_list); return ERR_PTR(qp_grp ? PTR_ERR(qp_grp) : -ENOMEM); } return qp_grp; diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h index c142f5e7f25f82..de57f2fed74375 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h @@ -509,6 +509,20 @@ static inline int ib_send_flags_to_pvrdma(int flags) return flags & PVRDMA_MASK(PVRDMA_SEND_FLAGS_MAX); } +static inline int pvrdma_network_type_to_ib(enum pvrdma_network_type type) +{ + switch (type) { + case PVRDMA_NETWORK_ROCE_V1: + return RDMA_NETWORK_ROCE_V1; + case PVRDMA_NETWORK_IPV4: + return RDMA_NETWORK_IPV4; + case PVRDMA_NETWORK_IPV6: + return RDMA_NETWORK_IPV6; + default: + return RDMA_NETWORK_IPV6; + } +} + void pvrdma_qp_cap_to_ib(struct ib_qp_cap *dst, const struct pvrdma_qp_cap *src); void ib_qp_cap_to_pvrdma(struct pvrdma_qp_cap *dst, diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c index 319546a39a0d5c..62164db593a4f2 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c @@ -364,7 +364,7 @@ static int pvrdma_poll_one(struct pvrdma_cq *cq, struct pvrdma_qp **cur_qp, wc->dlid_path_bits = cqe->dlid_path_bits; wc->port_num = cqe->port_num; wc->vendor_err = cqe->vendor_err; - wc->network_hdr_type = cqe->network_hdr_type; + wc->network_hdr_type = pvrdma_network_type_to_ib(cqe->network_hdr_type); /* Update shared ring state */ pvrdma_idx_ring_inc(&cq->ring_state->rx.cons_head, cq->ibcq.cqe); diff --git a/drivers/infiniband/sw/rdmavt/Kconfig b/drivers/infiniband/sw/rdmavt/Kconfig index c8e268082952b0..0df48b3a6b56c5 100644 --- a/drivers/infiniband/sw/rdmavt/Kconfig +++ b/drivers/infiniband/sw/rdmavt/Kconfig @@ -4,6 +4,5 @@ config INFINIBAND_RDMAVT depends on INFINIBAND_VIRT_DMA depends on X86_64 depends on PCI - select DMA_VIRT_OPS help This is a common software verbs provider for RDMA networks. diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c index 8490fdb9c91e50..90fc234f489acd 100644 --- a/drivers/infiniband/sw/rdmavt/mr.c +++ b/drivers/infiniband/sw/rdmavt/mr.c @@ -324,8 +324,6 @@ static void __rvt_free_mr(struct rvt_mr *mr) * @acc: access flags * * Return: the memory region on success, otherwise returns an errno. - * Note that all DMA addresses should be created via the functions in - * struct dma_virt_ops. */ struct ib_mr *rvt_get_dma_mr(struct ib_pd *pd, int acc) { @@ -766,7 +764,7 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, /* * We use LKEY == zero for kernel virtual addresses - * (see rvt_get_dma_mr() and dma_virt_ops). + * (see rvt_get_dma_mr()). */ if (sge->lkey == 0) { struct rvt_dev_info *dev = ib_to_rvt(pd->ibpd.device); @@ -877,7 +875,7 @@ int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge, /* * We use RKEY == zero for kernel virtual addresses - * (see rvt_get_dma_mr() and dma_virt_ops). + * (see rvt_get_dma_mr()). */ rcu_read_lock(); if (rkey == 0) { diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index 670a9623b46e11..d1bbe66610cfe4 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -524,7 +524,6 @@ static noinline int check_support(struct rvt_dev_info *rdi, int verb) int rvt_register_device(struct rvt_dev_info *rdi) { int ret = 0, i; - u64 dma_mask; if (!rdi) return -EINVAL; @@ -579,13 +578,6 @@ int rvt_register_device(struct rvt_dev_info *rdi) /* Completion queues */ spin_lock_init(&rdi->n_cqs_lock); - /* DMA Operations */ - rdi->ibdev.dev.dma_parms = rdi->ibdev.dev.parent->dma_parms; - dma_mask = IS_ENABLED(CONFIG_64BIT) ? DMA_BIT_MASK(64) : DMA_BIT_MASK(32); - ret = dma_coerce_mask_and_coherent(&rdi->ibdev.dev, dma_mask); - if (ret) - goto bail_wss; - /* Protection Domain */ spin_lock_init(&rdi->n_pds_lock); rdi->n_pds_allocated = 0; diff --git a/drivers/infiniband/sw/rxe/Kconfig b/drivers/infiniband/sw/rxe/Kconfig index 8810bfa680495a..4521490667925f 100644 --- a/drivers/infiniband/sw/rxe/Kconfig +++ b/drivers/infiniband/sw/rxe/Kconfig @@ -5,7 +5,6 @@ config RDMA_RXE depends on INFINIBAND_VIRT_DMA select NET_UDP_TUNNEL select CRYPTO_CRC32 - select DMA_VIRT_OPS help This driver implements the InfiniBand RDMA transport over the Linux network stack. It enables a system with a diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index 34bef7d8e6b41b..943914c2a50c70 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -20,18 +20,6 @@ static struct rxe_recv_sockets recv_sockets; -struct device *rxe_dma_device(struct rxe_dev *rxe) -{ - struct net_device *ndev; - - ndev = rxe->ndev; - - if (is_vlan_dev(ndev)) - ndev = vlan_dev_real_dev(ndev); - - return ndev->dev.parent; -} - int rxe_mcast_add(struct rxe_dev *rxe, union ib_gid *mgid) { int err; diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c index af3923bf0a36bf..d4917646641aad 100644 --- a/drivers/infiniband/sw/rxe/rxe_req.c +++ b/drivers/infiniband/sw/rxe/rxe_req.c @@ -634,7 +634,8 @@ int rxe_requester(void *arg) } if (unlikely(qp_type(qp) == IB_QPT_RC && - qp->req.psn > (qp->comp.psn + RXE_MAX_UNACKED_PSNS))) { + psn_compare(qp->req.psn, (qp->comp.psn + + RXE_MAX_UNACKED_PSNS)) > 0)) { qp->req.wait_psn = 1; goto exit; } diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index f9c832e82552f9..512868c2302383 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -1118,23 +1118,15 @@ int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name) int err; struct ib_device *dev = &rxe->ib_dev; struct crypto_shash *tfm; - u64 dma_mask; strlcpy(dev->node_desc, "rxe", sizeof(dev->node_desc)); dev->node_type = RDMA_NODE_IB_CA; dev->phys_port_cnt = 1; dev->num_comp_vectors = num_possible_cpus(); - dev->dev.parent = rxe_dma_device(rxe); dev->local_dma_lkey = 0; addrconf_addr_eui48((unsigned char *)&dev->node_guid, rxe->ndev->dev_addr); - dev->dev.dma_parms = &rxe->dma_parms; - dma_set_max_seg_size(&dev->dev, UINT_MAX); - dma_mask = IS_ENABLED(CONFIG_64BIT) ? DMA_BIT_MASK(64) : DMA_BIT_MASK(32); - err = dma_coerce_mask_and_coherent(&dev->dev, dma_mask); - if (err) - return err; dev->uverbs_cmd_mask = BIT_ULL(IB_USER_VERBS_CMD_GET_CONTEXT) | BIT_ULL(IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h index 3414b341b7091f..4bf5d85a1ab3ce 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.h +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h @@ -352,7 +352,6 @@ struct rxe_port { struct rxe_dev { struct ib_device ib_dev; struct ib_device_attr attr; - struct device_dma_parameters dma_parms; int max_ucontext; int max_inline_data; struct mutex usdev_lock; diff --git a/drivers/infiniband/sw/siw/Kconfig b/drivers/infiniband/sw/siw/Kconfig index 3450ba5081df51..1b5105cbabaeed 100644 --- a/drivers/infiniband/sw/siw/Kconfig +++ b/drivers/infiniband/sw/siw/Kconfig @@ -2,7 +2,6 @@ config RDMA_SIW tristate "Software RDMA over TCP/IP (iWARP) driver" depends on INET && INFINIBAND && LIBCRC32C depends on INFINIBAND_VIRT_DMA - select DMA_VIRT_OPS help This driver implements the iWARP RDMA transport over the Linux TCP/IP network stack. It enables a system with a diff --git a/drivers/infiniband/sw/siw/siw.h b/drivers/infiniband/sw/siw/siw.h index e9753831ac3f33..adda7899621962 100644 --- a/drivers/infiniband/sw/siw/siw.h +++ b/drivers/infiniband/sw/siw/siw.h @@ -69,7 +69,6 @@ struct siw_pd { struct siw_device { struct ib_device base_dev; - struct device_dma_parameters dma_parms; struct net_device *netdev; struct siw_dev_cap attrs; diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/infiniband/sw/siw/siw_main.c index 181e06c1c43d7e..9d152e198a59bf 100644 --- a/drivers/infiniband/sw/siw/siw_main.c +++ b/drivers/infiniband/sw/siw/siw_main.c @@ -305,25 +305,8 @@ static struct siw_device *siw_device_create(struct net_device *netdev) { struct siw_device *sdev = NULL; struct ib_device *base_dev; - struct device *parent = netdev->dev.parent; - u64 dma_mask; int rv; - if (!parent) { - /* - * The loopback device has no parent device, - * so it appears as a top-level device. To support - * loopback device connectivity, take this device - * as the parent device. Skip all other devices - * w/o parent device. - */ - if (netdev->type != ARPHRD_LOOPBACK) { - pr_warn("siw: device %s error: no parent device\n", - netdev->name); - return NULL; - } - parent = &netdev->dev; - } sdev = ib_alloc_device(siw_device, base_dev); if (!sdev) return NULL; @@ -382,13 +365,6 @@ static struct siw_device *siw_device_create(struct net_device *netdev) * per physical port. */ base_dev->phys_port_cnt = 1; - base_dev->dev.parent = parent; - base_dev->dev.dma_parms = &sdev->dma_parms; - dma_set_max_seg_size(&base_dev->dev, UINT_MAX); - dma_mask = IS_ENABLED(CONFIG_64BIT) ? DMA_BIT_MASK(64) : DMA_BIT_MASK(32); - if (dma_coerce_mask_and_coherent(&base_dev->dev, dma_mask)) - goto error; - base_dev->num_comp_vectors = num_possible_cpus(); xa_init_flags(&sdev->qp_xa, XA_FLAGS_ALLOC1); @@ -430,7 +406,7 @@ static struct siw_device *siw_device_create(struct net_device *netdev) atomic_set(&sdev->num_mr, 0); atomic_set(&sdev->num_pd, 0); - sdev->numa_node = dev_to_node(parent); + sdev->numa_node = dev_to_node(&netdev->dev); spin_lock_init(&sdev->lock); return sdev; diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c index f298adc02acba2..d54a77ebe11840 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c @@ -1640,10 +1640,8 @@ static int rtrs_rdma_addr_resolved(struct rtrs_clt_con *con) return err; } err = rdma_resolve_route(con->c.cm_id, RTRS_CONNECT_TIMEOUT_MS); - if (err) { + if (err) rtrs_err(s, "Resolving route failed, err: %d\n", err); - destroy_con_cq_qp(con); - } return err; } @@ -1837,8 +1835,8 @@ static int rtrs_clt_rdma_cm_handler(struct rdma_cm_id *cm_id, cm_err = rtrs_rdma_route_resolved(con); break; case RDMA_CM_EVENT_ESTABLISHED: - con->cm_err = rtrs_rdma_conn_established(con, ev); - if (likely(!con->cm_err)) { + cm_err = rtrs_rdma_conn_established(con, ev); + if (likely(!cm_err)) { /* * Report success and wake up. Here we abuse state_wq, * i.e. wake up without state change, but we set cm_err. diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.c b/drivers/infiniband/ulp/rtrs/rtrs-srv.c index d6f93601712e49..1cb778aff3c59a 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.c @@ -1328,17 +1328,42 @@ static void rtrs_srv_dev_release(struct device *dev) kfree(srv); } -static struct rtrs_srv *__alloc_srv(struct rtrs_srv_ctx *ctx, - const uuid_t *paths_uuid) +static void free_srv(struct rtrs_srv *srv) +{ + int i; + + WARN_ON(refcount_read(&srv->refcount)); + for (i = 0; i < srv->queue_depth; i++) + mempool_free(srv->chunks[i], chunk_pool); + kfree(srv->chunks); + mutex_destroy(&srv->paths_mutex); + mutex_destroy(&srv->paths_ev_mutex); + /* last put to release the srv structure */ + put_device(&srv->dev); +} + +static struct rtrs_srv *get_or_create_srv(struct rtrs_srv_ctx *ctx, + const uuid_t *paths_uuid) { struct rtrs_srv *srv; int i; + mutex_lock(&ctx->srv_mutex); + list_for_each_entry(srv, &ctx->srv_list, ctx_list) { + if (uuid_equal(&srv->paths_uuid, paths_uuid) && + refcount_inc_not_zero(&srv->refcount)) { + mutex_unlock(&ctx->srv_mutex); + return srv; + } + } + + /* need to allocate a new srv */ srv = kzalloc(sizeof(*srv), GFP_KERNEL); - if (!srv) + if (!srv) { + mutex_unlock(&ctx->srv_mutex); return NULL; + } - refcount_set(&srv->refcount, 1); INIT_LIST_HEAD(&srv->paths_list); mutex_init(&srv->paths_mutex); mutex_init(&srv->paths_ev_mutex); @@ -1347,6 +1372,8 @@ static struct rtrs_srv *__alloc_srv(struct rtrs_srv_ctx *ctx, srv->ctx = ctx; device_initialize(&srv->dev); srv->dev.release = rtrs_srv_dev_release; + list_add(&srv->ctx_list, &ctx->srv_list); + mutex_unlock(&ctx->srv_mutex); srv->chunks = kcalloc(srv->queue_depth, sizeof(*srv->chunks), GFP_KERNEL); @@ -1358,7 +1385,7 @@ static struct rtrs_srv *__alloc_srv(struct rtrs_srv_ctx *ctx, if (!srv->chunks[i]) goto err_free_chunks; } - list_add(&srv->ctx_list, &ctx->srv_list); + refcount_set(&srv->refcount, 1); return srv; @@ -1369,52 +1396,9 @@ static struct rtrs_srv *__alloc_srv(struct rtrs_srv_ctx *ctx, err_free_srv: kfree(srv); - return NULL; } -static void free_srv(struct rtrs_srv *srv) -{ - int i; - - WARN_ON(refcount_read(&srv->refcount)); - for (i = 0; i < srv->queue_depth; i++) - mempool_free(srv->chunks[i], chunk_pool); - kfree(srv->chunks); - mutex_destroy(&srv->paths_mutex); - mutex_destroy(&srv->paths_ev_mutex); - /* last put to release the srv structure */ - put_device(&srv->dev); -} - -static inline struct rtrs_srv *__find_srv_and_get(struct rtrs_srv_ctx *ctx, - const uuid_t *paths_uuid) -{ - struct rtrs_srv *srv; - - list_for_each_entry(srv, &ctx->srv_list, ctx_list) { - if (uuid_equal(&srv->paths_uuid, paths_uuid) && - refcount_inc_not_zero(&srv->refcount)) - return srv; - } - - return NULL; -} - -static struct rtrs_srv *get_or_create_srv(struct rtrs_srv_ctx *ctx, - const uuid_t *paths_uuid) -{ - struct rtrs_srv *srv; - - mutex_lock(&ctx->srv_mutex); - srv = __find_srv_and_get(ctx, paths_uuid); - if (!srv) - srv = __alloc_srv(ctx, paths_uuid); - mutex_unlock(&ctx->srv_mutex); - - return srv; -} - static void put_srv(struct rtrs_srv *srv) { if (refcount_dec_and_test(&srv->refcount)) { @@ -1813,7 +1797,11 @@ static int rtrs_rdma_connect(struct rdma_cm_id *cm_id, } recon_cnt = le16_to_cpu(msg->recon_cnt); srv = get_or_create_srv(ctx, &msg->paths_uuid); - if (!srv) { + /* + * "refcount == 0" happens if a previous thread calls get_or_create_srv + * allocate srv, but chunks of srv are not allocated yet. + */ + if (!srv || refcount_read(&srv->refcount) == 0) { err = -ENOMEM; goto reject_w_err; } diff --git a/drivers/input/keyboard/omap4-keypad.c b/drivers/input/keyboard/omap4-keypad.c index d6c924032aaa81..dd16f7b3c7ef68 100644 --- a/drivers/input/keyboard/omap4-keypad.c +++ b/drivers/input/keyboard/omap4-keypad.c @@ -186,12 +186,8 @@ static int omap4_keypad_open(struct input_dev *input) return 0; } -static void omap4_keypad_close(struct input_dev *input) +static void omap4_keypad_stop(struct omap4_keypad *keypad_data) { - struct omap4_keypad *keypad_data = input_get_drvdata(input); - - disable_irq(keypad_data->irq); - /* Disable interrupts and wake-up events */ kbd_write_irqreg(keypad_data, OMAP4_KBD_IRQENABLE, OMAP4_VAL_IRQDISABLE); @@ -200,7 +196,15 @@ static void omap4_keypad_close(struct input_dev *input) /* clear pending interrupts */ kbd_write_irqreg(keypad_data, OMAP4_KBD_IRQSTATUS, kbd_read_irqreg(keypad_data, OMAP4_KBD_IRQSTATUS)); +} + +static void omap4_keypad_close(struct input_dev *input) +{ + struct omap4_keypad *keypad_data; + keypad_data = input_get_drvdata(input); + disable_irq(keypad_data->irq); + omap4_keypad_stop(keypad_data); enable_irq(keypad_data->irq); pm_runtime_put_sync(input->dev.parent); @@ -223,13 +227,37 @@ static int omap4_keypad_parse_dt(struct device *dev, return 0; } +static int omap4_keypad_check_revision(struct device *dev, + struct omap4_keypad *keypad_data) +{ + unsigned int rev; + + rev = __raw_readl(keypad_data->base + OMAP4_KBD_REVISION); + rev &= 0x03 << 30; + rev >>= 30; + switch (rev) { + case KBD_REVISION_OMAP4: + keypad_data->reg_offset = 0x00; + keypad_data->irqreg_offset = 0x00; + break; + case KBD_REVISION_OMAP5: + keypad_data->reg_offset = 0x10; + keypad_data->irqreg_offset = 0x0c; + break; + default: + dev_err(dev, "Keypad reports unsupported revision %d", rev); + return -EINVAL; + } + + return 0; +} + static int omap4_keypad_probe(struct platform_device *pdev) { struct omap4_keypad *keypad_data; struct input_dev *input_dev; struct resource *res; unsigned int max_keys; - int rev; int irq; int error; @@ -269,41 +297,33 @@ static int omap4_keypad_probe(struct platform_device *pdev) goto err_release_mem; } + pm_runtime_enable(&pdev->dev); /* * Enable clocks for the keypad module so that we can read * revision register. */ - pm_runtime_enable(&pdev->dev); error = pm_runtime_get_sync(&pdev->dev); if (error) { dev_err(&pdev->dev, "pm_runtime_get_sync() failed\n"); - goto err_unmap; - } - rev = __raw_readl(keypad_data->base + OMAP4_KBD_REVISION); - rev &= 0x03 << 30; - rev >>= 30; - switch (rev) { - case KBD_REVISION_OMAP4: - keypad_data->reg_offset = 0x00; - keypad_data->irqreg_offset = 0x00; - break; - case KBD_REVISION_OMAP5: - keypad_data->reg_offset = 0x10; - keypad_data->irqreg_offset = 0x0c; - break; - default: - dev_err(&pdev->dev, - "Keypad reports unsupported revision %d", rev); - error = -EINVAL; - goto err_pm_put_sync; + pm_runtime_put_noidle(&pdev->dev); + } else { + error = omap4_keypad_check_revision(&pdev->dev, + keypad_data); + if (!error) { + /* Ensure device does not raise interrupts */ + omap4_keypad_stop(keypad_data); + } + pm_runtime_put_sync(&pdev->dev); } + if (error) + goto err_pm_disable; /* input device allocation */ keypad_data->input = input_dev = input_allocate_device(); if (!input_dev) { error = -ENOMEM; - goto err_pm_put_sync; + goto err_pm_disable; } input_dev->name = pdev->name; @@ -349,28 +369,25 @@ static int omap4_keypad_probe(struct platform_device *pdev) goto err_free_keymap; } - device_init_wakeup(&pdev->dev, true); - pm_runtime_put_sync(&pdev->dev); - error = input_register_device(keypad_data->input); if (error < 0) { dev_err(&pdev->dev, "failed to register input device\n"); - goto err_pm_disable; + goto err_free_irq; } + device_init_wakeup(&pdev->dev, true); platform_set_drvdata(pdev, keypad_data); + return 0; -err_pm_disable: - pm_runtime_disable(&pdev->dev); +err_free_irq: free_irq(keypad_data->irq, keypad_data); err_free_keymap: kfree(keypad_data->keymap); err_free_input: input_free_device(input_dev); -err_pm_put_sync: - pm_runtime_put_sync(&pdev->dev); -err_unmap: +err_pm_disable: + pm_runtime_disable(&pdev->dev); iounmap(keypad_data->base); err_release_mem: release_mem_region(res->start, resource_size(res)); diff --git a/drivers/input/mouse/cyapa_gen6.c b/drivers/input/mouse/cyapa_gen6.c index 7eba66fbef580b..812edfced86eee 100644 --- a/drivers/input/mouse/cyapa_gen6.c +++ b/drivers/input/mouse/cyapa_gen6.c @@ -573,7 +573,7 @@ static int cyapa_pip_retrieve_data_structure(struct cyapa *cyapa, memset(&cmd, 0, sizeof(cmd)); put_unaligned_le16(PIP_OUTPUT_REPORT_ADDR, &cmd.head.addr); - put_unaligned_le16(sizeof(cmd), &cmd.head.length - 2); + put_unaligned_le16(sizeof(cmd) - 2, &cmd.head.length); cmd.head.report_id = PIP_APP_CMD_REPORT_ID; cmd.head.cmd_code = PIP_RETRIEVE_DATA_STRUCTURE; put_unaligned_le16(read_offset, &cmd.read_offset); diff --git a/drivers/input/touchscreen/ads7846.c b/drivers/input/touchscreen/ads7846.c index 8fd7fc39c4fd7b..ff97897feaf2a8 100644 --- a/drivers/input/touchscreen/ads7846.c +++ b/drivers/input/touchscreen/ads7846.c @@ -33,6 +33,7 @@ #include #include #include +#include /* * This code has been heavily tested on a Nokia 770, and lightly @@ -199,6 +200,26 @@ struct ads7846 { #define REF_ON (READ_12BIT_DFR(x, 1, 1)) #define REF_OFF (READ_12BIT_DFR(y, 0, 0)) +static int get_pendown_state(struct ads7846 *ts) +{ + if (ts->get_pendown_state) + return ts->get_pendown_state(); + + return !gpio_get_value(ts->gpio_pendown); +} + +static void ads7846_report_pen_up(struct ads7846 *ts) +{ + struct input_dev *input = ts->input; + + input_report_key(input, BTN_TOUCH, 0); + input_report_abs(input, ABS_PRESSURE, 0); + input_sync(input); + + ts->pendown = false; + dev_vdbg(&ts->spi->dev, "UP\n"); +} + /* Must be called with ts->lock held */ static void ads7846_stop(struct ads7846 *ts) { @@ -215,6 +236,10 @@ static void ads7846_stop(struct ads7846 *ts) static void ads7846_restart(struct ads7846 *ts) { if (!ts->disabled && !ts->suspended) { + /* Check if pen was released since last stop */ + if (ts->pendown && !get_pendown_state(ts)) + ads7846_report_pen_up(ts); + /* Tell IRQ thread that it may poll the device. */ ts->stopped = false; mb(); @@ -411,7 +436,7 @@ static int ads7845_read12_ser(struct device *dev, unsigned command) if (status == 0) { /* BE12 value, then padding */ - status = be16_to_cpu(*((u16 *)&req->sample[1])); + status = get_unaligned_be16(&req->sample[1]); status = status >> 3; status &= 0x0fff; } @@ -606,14 +631,6 @@ static const struct attribute_group ads784x_attr_group = { /*--------------------------------------------------------------------------*/ -static int get_pendown_state(struct ads7846 *ts) -{ - if (ts->get_pendown_state) - return ts->get_pendown_state(); - - return !gpio_get_value(ts->gpio_pendown); -} - static void null_wait_for_sync(void) { } @@ -786,10 +803,11 @@ static void ads7846_report_state(struct ads7846 *ts) /* compute touch pressure resistance using equation #2 */ Rt = z2; Rt -= z1; - Rt *= x; Rt *= ts->x_plate_ohms; + Rt = DIV_ROUND_CLOSEST(Rt, 16); + Rt *= x; Rt /= z1; - Rt = (Rt + 2047) >> 12; + Rt = DIV_ROUND_CLOSEST(Rt, 256); } else { Rt = 0; } @@ -868,16 +886,8 @@ static irqreturn_t ads7846_irq(int irq, void *handle) msecs_to_jiffies(TS_POLL_PERIOD)); } - if (ts->pendown && !ts->stopped) { - struct input_dev *input = ts->input; - - input_report_key(input, BTN_TOUCH, 0); - input_report_abs(input, ABS_PRESSURE, 0); - input_sync(input); - - ts->pendown = false; - dev_vdbg(&ts->spi->dev, "UP\n"); - } + if (ts->pendown && !ts->stopped) + ads7846_report_pen_up(ts); return IRQ_HANDLED; } diff --git a/drivers/interconnect/imx/imx.c b/drivers/interconnect/imx/imx.c index 41dba7090c2ae9..e398ebf1dbbabe 100644 --- a/drivers/interconnect/imx/imx.c +++ b/drivers/interconnect/imx/imx.c @@ -99,6 +99,7 @@ static int imx_icc_node_init_qos(struct icc_provider *provider, if (!dn || !of_device_is_available(dn)) { dev_warn(dev, "Missing property %s, skip scaling %s\n", adj->phandle_name, node->name); + of_node_put(dn); return 0; } diff --git a/drivers/interconnect/imx/imx8mq.c b/drivers/interconnect/imx/imx8mq.c index ba43a15aefec0d..d7768d3c6d8aa1 100644 --- a/drivers/interconnect/imx/imx8mq.c +++ b/drivers/interconnect/imx/imx8mq.c @@ -7,6 +7,7 @@ #include #include +#include #include #include "imx.h" @@ -94,6 +95,7 @@ static struct platform_driver imx8mq_icc_driver = { .remove = imx8mq_icc_remove, .driver = { .name = "imx8mq-interconnect", + .sync_state = icc_sync_state, }, }; diff --git a/drivers/interconnect/qcom/Kconfig b/drivers/interconnect/qcom/Kconfig index a8f93ba265f810..b3fb5b02bcf1ea 100644 --- a/drivers/interconnect/qcom/Kconfig +++ b/drivers/interconnect/qcom/Kconfig @@ -42,13 +42,23 @@ config INTERCONNECT_QCOM_QCS404 This is a driver for the Qualcomm Network-on-Chip on qcs404-based platforms. +config INTERCONNECT_QCOM_RPMH_POSSIBLE + tristate + default INTERCONNECT_QCOM + depends on QCOM_RPMH || (COMPILE_TEST && !QCOM_RPMH) + depends on QCOM_COMMAND_DB || (COMPILE_TEST && !QCOM_COMMAND_DB) + depends on OF || COMPILE_TEST + help + Compile-testing RPMH drivers is possible on other platforms, + but in order to avoid link failures, drivers must not be built-in + when QCOM_RPMH or QCOM_COMMAND_DB are loadable modules + config INTERCONNECT_QCOM_RPMH tristate config INTERCONNECT_QCOM_SC7180 tristate "Qualcomm SC7180 interconnect driver" - depends on INTERCONNECT_QCOM - depends on (QCOM_RPMH && QCOM_COMMAND_DB && OF) || COMPILE_TEST + depends on INTERCONNECT_QCOM_RPMH_POSSIBLE select INTERCONNECT_QCOM_RPMH select INTERCONNECT_QCOM_BCM_VOTER help @@ -57,8 +67,7 @@ config INTERCONNECT_QCOM_SC7180 config INTERCONNECT_QCOM_SDM845 tristate "Qualcomm SDM845 interconnect driver" - depends on INTERCONNECT_QCOM - depends on (QCOM_RPMH && QCOM_COMMAND_DB && OF) || COMPILE_TEST + depends on INTERCONNECT_QCOM_RPMH_POSSIBLE select INTERCONNECT_QCOM_RPMH select INTERCONNECT_QCOM_BCM_VOTER help @@ -67,8 +76,7 @@ config INTERCONNECT_QCOM_SDM845 config INTERCONNECT_QCOM_SM8150 tristate "Qualcomm SM8150 interconnect driver" - depends on INTERCONNECT_QCOM - depends on (QCOM_RPMH && QCOM_COMMAND_DB && OF) || COMPILE_TEST + depends on INTERCONNECT_QCOM_RPMH_POSSIBLE select INTERCONNECT_QCOM_RPMH select INTERCONNECT_QCOM_BCM_VOTER help @@ -77,8 +85,7 @@ config INTERCONNECT_QCOM_SM8150 config INTERCONNECT_QCOM_SM8250 tristate "Qualcomm SM8250 interconnect driver" - depends on INTERCONNECT_QCOM - depends on (QCOM_RPMH && QCOM_COMMAND_DB && OF) || COMPILE_TEST + depends on INTERCONNECT_QCOM_RPMH_POSSIBLE select INTERCONNECT_QCOM_RPMH select INTERCONNECT_QCOM_BCM_VOTER help diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index 6b8cbdf7171407..b4adab69856323 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -84,12 +84,9 @@ static inline bool is_rd890_iommu(struct pci_dev *pdev) (pdev->device == PCI_DEVICE_ID_RD890_IOMMU); } -static inline bool iommu_feature(struct amd_iommu *iommu, u64 f) +static inline bool iommu_feature(struct amd_iommu *iommu, u64 mask) { - if (!(iommu->cap & (1 << IOMMU_CAP_EFR))) - return false; - - return !!(iommu->features & f); + return !!(iommu->features & mask); } static inline u64 iommu_virt_to_phys(void *vaddr) diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index 494b42a31b7aed..33446c9d3bac81 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -379,6 +379,10 @@ #define IOMMU_CAP_NPCACHE 26 #define IOMMU_CAP_EFR 27 +/* IOMMU IVINFO */ +#define IOMMU_IVINFO_OFFSET 36 +#define IOMMU_IVINFO_EFRSUP BIT(0) + /* IOMMU Feature Reporting Field (for IVHD type 10h */ #define IOMMU_FEAT_GASUP_SHIFT 6 diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 23a790f8f55061..c842545368fdd5 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -257,6 +257,8 @@ static void init_device_table_dma(void); static bool amd_iommu_pre_enabled = true; +static u32 amd_iommu_ivinfo __initdata; + bool translation_pre_enabled(struct amd_iommu *iommu) { return (iommu->flags & AMD_IOMMU_FLAG_TRANS_PRE_ENABLED); @@ -296,6 +298,18 @@ int amd_iommu_get_num_iommus(void) return amd_iommus_present; } +/* + * For IVHD type 0x11/0x40, EFR is also available via IVHD. + * Default to IVHD EFR since it is available sooner + * (i.e. before PCI init). + */ +static void __init early_iommu_features_init(struct amd_iommu *iommu, + struct ivhd_header *h) +{ + if (amd_iommu_ivinfo & IOMMU_IVINFO_EFRSUP) + iommu->features = h->efr_reg; +} + /* Access to l1 and l2 indexed register spaces */ static u32 iommu_read_l1(struct amd_iommu *iommu, u16 l1, u8 address) @@ -1584,6 +1598,9 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) if ((h->efr_reg & BIT(IOMMU_EFR_XTSUP_SHIFT)) && (h->efr_reg & BIT(IOMMU_EFR_MSICAPMMIOSUP_SHIFT))) amd_iommu_xt_mode = IRQ_REMAP_X2APIC_MODE; + + early_iommu_features_init(iommu, h); + break; default: return -EINVAL; @@ -1775,6 +1792,35 @@ static const struct attribute_group *amd_iommu_groups[] = { NULL, }; +/* + * Note: IVHD 0x11 and 0x40 also contains exact copy + * of the IOMMU Extended Feature Register [MMIO Offset 0030h]. + * Default to EFR in IVHD since it is available sooner (i.e. before PCI init). + */ +static void __init late_iommu_features_init(struct amd_iommu *iommu) +{ + u64 features; + + if (!(iommu->cap & (1 << IOMMU_CAP_EFR))) + return; + + /* read extended feature bits */ + features = readq(iommu->mmio_base + MMIO_EXT_FEATURES); + + if (!iommu->features) { + iommu->features = features; + return; + } + + /* + * Sanity check and warn if EFR values from + * IVHD and MMIO conflict. + */ + if (features != iommu->features) + pr_warn(FW_WARN "EFR mismatch. Use IVHD EFR (%#llx : %#llx\n).", + features, iommu->features); +} + static int __init iommu_init_pci(struct amd_iommu *iommu) { int cap_ptr = iommu->cap_ptr; @@ -1794,8 +1840,7 @@ static int __init iommu_init_pci(struct amd_iommu *iommu) if (!(iommu->cap & (1 << IOMMU_CAP_IOTLB))) amd_iommu_iotlb_sup = false; - /* read extended feature bits */ - iommu->features = readq(iommu->mmio_base + MMIO_EXT_FEATURES); + late_iommu_features_init(iommu); if (iommu_feature(iommu, FEATURE_GT)) { int glxval; @@ -2525,6 +2570,11 @@ static void __init free_dma_resources(void) free_unity_maps(); } +static void __init ivinfo_init(void *ivrs) +{ + amd_iommu_ivinfo = *((u32 *)(ivrs + IOMMU_IVINFO_OFFSET)); +} + /* * This is the hardware init function for AMD IOMMU in the system. * This function is called either from amd_iommu_init or from the interrupt @@ -2579,6 +2629,8 @@ static int __init early_amd_iommu_init(void) if (ret) goto out; + ivinfo_init(ivrs_base); + amd_iommu_target_ivhd_type = get_highest_supported_ivhd_type(ivrs_base); DUMP_printk("Using IVHD type %#x\n", amd_iommu_target_ivhd_type); diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c index 702fbaa6c9ada1..0eba5e883e3f1d 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c @@ -10,8 +10,15 @@ struct qcom_smmu { struct arm_smmu_device smmu; + bool bypass_quirk; + u8 bypass_cbndx; }; +static struct qcom_smmu *to_qcom_smmu(struct arm_smmu_device *smmu) +{ + return container_of(smmu, struct qcom_smmu, smmu); +} + static const struct of_device_id qcom_smmu_client_of_match[] __maybe_unused = { { .compatible = "qcom,adreno" }, { .compatible = "qcom,mdp4" }, @@ -23,6 +30,89 @@ static const struct of_device_id qcom_smmu_client_of_match[] __maybe_unused = { { } }; +static int qcom_smmu_cfg_probe(struct arm_smmu_device *smmu) +{ + unsigned int last_s2cr = ARM_SMMU_GR0_S2CR(smmu->num_mapping_groups - 1); + struct qcom_smmu *qsmmu = to_qcom_smmu(smmu); + u32 reg; + u32 smr; + int i; + + /* + * With some firmware versions writes to S2CR of type FAULT are + * ignored, and writing BYPASS will end up written as FAULT in the + * register. Perform a write to S2CR to detect if this is the case and + * if so reserve a context bank to emulate bypass streams. + */ + reg = FIELD_PREP(ARM_SMMU_S2CR_TYPE, S2CR_TYPE_BYPASS) | + FIELD_PREP(ARM_SMMU_S2CR_CBNDX, 0xff) | + FIELD_PREP(ARM_SMMU_S2CR_PRIVCFG, S2CR_PRIVCFG_DEFAULT); + arm_smmu_gr0_write(smmu, last_s2cr, reg); + reg = arm_smmu_gr0_read(smmu, last_s2cr); + if (FIELD_GET(ARM_SMMU_S2CR_TYPE, reg) != S2CR_TYPE_BYPASS) { + qsmmu->bypass_quirk = true; + qsmmu->bypass_cbndx = smmu->num_context_banks - 1; + + set_bit(qsmmu->bypass_cbndx, smmu->context_map); + + arm_smmu_cb_write(smmu, qsmmu->bypass_cbndx, ARM_SMMU_CB_SCTLR, 0); + + reg = FIELD_PREP(ARM_SMMU_CBAR_TYPE, CBAR_TYPE_S1_TRANS_S2_BYPASS); + arm_smmu_gr1_write(smmu, ARM_SMMU_GR1_CBAR(qsmmu->bypass_cbndx), reg); + } + + for (i = 0; i < smmu->num_mapping_groups; i++) { + smr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_SMR(i)); + + if (FIELD_GET(ARM_SMMU_SMR_VALID, smr)) { + smmu->smrs[i].id = FIELD_GET(ARM_SMMU_SMR_ID, smr); + smmu->smrs[i].mask = FIELD_GET(ARM_SMMU_SMR_MASK, smr); + smmu->smrs[i].valid = true; + + smmu->s2crs[i].type = S2CR_TYPE_BYPASS; + smmu->s2crs[i].privcfg = S2CR_PRIVCFG_DEFAULT; + smmu->s2crs[i].cbndx = 0xff; + } + } + + return 0; +} + +static void qcom_smmu_write_s2cr(struct arm_smmu_device *smmu, int idx) +{ + struct arm_smmu_s2cr *s2cr = smmu->s2crs + idx; + struct qcom_smmu *qsmmu = to_qcom_smmu(smmu); + u32 cbndx = s2cr->cbndx; + u32 type = s2cr->type; + u32 reg; + + if (qsmmu->bypass_quirk) { + if (type == S2CR_TYPE_BYPASS) { + /* + * Firmware with quirky S2CR handling will substitute + * BYPASS writes with FAULT, so point the stream to the + * reserved context bank and ask for translation on the + * stream + */ + type = S2CR_TYPE_TRANS; + cbndx = qsmmu->bypass_cbndx; + } else if (type == S2CR_TYPE_FAULT) { + /* + * Firmware with quirky S2CR handling will ignore FAULT + * writes, so trick it to write FAULT by asking for a + * BYPASS. + */ + type = S2CR_TYPE_BYPASS; + cbndx = 0xff; + } + } + + reg = FIELD_PREP(ARM_SMMU_S2CR_TYPE, type) | + FIELD_PREP(ARM_SMMU_S2CR_CBNDX, cbndx) | + FIELD_PREP(ARM_SMMU_S2CR_PRIVCFG, s2cr->privcfg); + arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_S2CR(idx), reg); +} + static int qcom_smmu_def_domain_type(struct device *dev) { const struct of_device_id *match = @@ -61,8 +151,10 @@ static int qcom_smmu500_reset(struct arm_smmu_device *smmu) } static const struct arm_smmu_impl qcom_smmu_impl = { + .cfg_probe = qcom_smmu_cfg_probe, .def_domain_type = qcom_smmu_def_domain_type, .reset = qcom_smmu500_reset, + .write_s2cr = qcom_smmu_write_s2cr, }; struct arm_smmu_device *qcom_smmu_impl_init(struct arm_smmu_device *smmu) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c index dad7fa86fbd4cf..bcbacf22331d60 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c @@ -929,9 +929,16 @@ static void arm_smmu_write_smr(struct arm_smmu_device *smmu, int idx) static void arm_smmu_write_s2cr(struct arm_smmu_device *smmu, int idx) { struct arm_smmu_s2cr *s2cr = smmu->s2crs + idx; - u32 reg = FIELD_PREP(ARM_SMMU_S2CR_TYPE, s2cr->type) | - FIELD_PREP(ARM_SMMU_S2CR_CBNDX, s2cr->cbndx) | - FIELD_PREP(ARM_SMMU_S2CR_PRIVCFG, s2cr->privcfg); + u32 reg; + + if (smmu->impl && smmu->impl->write_s2cr) { + smmu->impl->write_s2cr(smmu, idx); + return; + } + + reg = FIELD_PREP(ARM_SMMU_S2CR_TYPE, s2cr->type) | + FIELD_PREP(ARM_SMMU_S2CR_CBNDX, s2cr->cbndx) | + FIELD_PREP(ARM_SMMU_S2CR_PRIVCFG, s2cr->privcfg); if (smmu->features & ARM_SMMU_FEAT_EXIDS && smmu->smrs && smmu->smrs[idx].valid) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h b/drivers/iommu/arm/arm-smmu/arm-smmu.h index 1a746476927c99..b71647eaa319b2 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.h +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h @@ -436,6 +436,7 @@ struct arm_smmu_impl { int (*alloc_context_bank)(struct arm_smmu_domain *smmu_domain, struct arm_smmu_device *smmu, struct device *dev, int start); + void (*write_s2cr)(struct arm_smmu_device *smmu, int idx); }; #define INVALID_SMENDX -1 diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c index b46dbfa6d0ed69..02e7c10a4224b0 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c @@ -1461,8 +1461,8 @@ void qi_flush_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid, u64 addr, int mask = ilog2(__roundup_pow_of_two(npages)); unsigned long align = (1ULL << (VTD_PAGE_SHIFT + mask)); - if (WARN_ON_ONCE(!ALIGN(addr, align))) - addr &= ~(align - 1); + if (WARN_ON_ONCE(!IS_ALIGNED(addr, align))) + addr = ALIGN_DOWN(addr, align); desc.qw0 = QI_EIOTLB_PASID(pasid) | QI_EIOTLB_DID(did) | @@ -1496,7 +1496,7 @@ void qi_flush_dev_iotlb_pasid(struct intel_iommu *iommu, u16 sid, u16 pfsid, * Max Invs Pending (MIP) is set to 0 for now until we have DIT in * ECAP. */ - if (addr & GENMASK_ULL(size_order + VTD_PAGE_SHIFT, 0)) + if (!IS_ALIGNED(addr, VTD_PAGE_SIZE << size_order)) pr_warn_ratelimited("Invalidate non-aligned address %llx, order %d\n", addr, size_order); diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index a49afa11673cc5..7e3db4c0324d39 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -67,8 +67,8 @@ #define MAX_AGAW_WIDTH 64 #define MAX_AGAW_PFN_WIDTH (MAX_AGAW_WIDTH - VTD_PAGE_SHIFT) -#define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1) -#define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1) +#define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << ((gaw) - VTD_PAGE_SHIFT)) - 1) +#define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << (gaw)) - 1) /* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR to match. That way, we can use 'unsigned long' for PFNs with impunity. */ @@ -739,6 +739,18 @@ static void domain_update_iommu_cap(struct dmar_domain *domain) */ if (domain->nid == NUMA_NO_NODE) domain->nid = domain_update_device_node(domain); + + /* + * First-level translation restricts the input-address to a + * canonical address (i.e., address bits 63:N have the same + * value as address bit [N-1], where N is 48-bits with 4-level + * paging and 57-bits with 5-level paging). Hence, skip bit + * [N-1]. + */ + if (domain_use_first_level(domain)) + domain->domain.geometry.aperture_end = __DOMAIN_MAX_ADDR(domain->gaw - 1); + else + domain->domain.geometry.aperture_end = __DOMAIN_MAX_ADDR(domain->gaw); } struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus, @@ -3338,6 +3350,11 @@ static int __init init_dmars(void) if (!ecap_pass_through(iommu->ecap)) hw_pass_through = 0; + + if (!intel_iommu_strict && cap_caching_mode(iommu->cap)) { + pr_warn("Disable batched IOTLB flush due to virtualization"); + intel_iommu_strict = 1; + } intel_svm_check(iommu); } @@ -5387,6 +5404,7 @@ static void intel_iommu_aux_detach_device(struct iommu_domain *domain, aux_domain_remove_dev(to_dmar_domain(domain), dev); } +#ifdef CONFIG_INTEL_IOMMU_SVM /* * 2D array for converting and sanitizing IOMMU generic TLB granularity to * VT-d granularity. Invalidation is typically included in the unmap operation @@ -5433,7 +5451,6 @@ static inline u64 to_vtd_size(u64 granu_size, u64 nr_granules) return order_base_2(nr_pages); } -#ifdef CONFIG_INTEL_IOMMU_SVM static int intel_iommu_sva_invalidate(struct iommu_domain *domain, struct device *dev, struct iommu_cache_invalidate_info *inv_info) diff --git a/drivers/iommu/intel/irq_remapping.c b/drivers/iommu/intel/irq_remapping.c index 0cfce1d3b7bbd8..aedaae4630bc81 100644 --- a/drivers/iommu/intel/irq_remapping.c +++ b/drivers/iommu/intel/irq_remapping.c @@ -1390,6 +1390,8 @@ static int intel_irq_remapping_alloc(struct irq_domain *domain, irq_data = irq_domain_get_irq_data(domain, virq + i); irq_cfg = irqd_cfg(irq_data); if (!irq_data || !irq_cfg) { + if (!i) + kfree(data); ret = -EINVAL; goto out_free_data; } diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 3242ebd0bca36a..43f392d27d3188 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -118,8 +118,10 @@ void intel_svm_check(struct intel_iommu *iommu) iommu->flags |= VTD_FLAG_SVM_CAPABLE; } -static void intel_flush_svm_range_dev (struct intel_svm *svm, struct intel_svm_dev *sdev, - unsigned long address, unsigned long pages, int ih) +static void __flush_svm_range_dev(struct intel_svm *svm, + struct intel_svm_dev *sdev, + unsigned long address, + unsigned long pages, int ih) { struct qi_desc desc; @@ -142,7 +144,7 @@ static void intel_flush_svm_range_dev (struct intel_svm *svm, struct intel_svm_d } desc.qw2 = 0; desc.qw3 = 0; - qi_submit_sync(svm->iommu, &desc, 1, 0); + qi_submit_sync(sdev->iommu, &desc, 1, 0); if (sdev->dev_iotlb) { desc.qw0 = QI_DEV_EIOTLB_PASID(svm->pasid) | @@ -166,7 +168,23 @@ static void intel_flush_svm_range_dev (struct intel_svm *svm, struct intel_svm_d } desc.qw2 = 0; desc.qw3 = 0; - qi_submit_sync(svm->iommu, &desc, 1, 0); + qi_submit_sync(sdev->iommu, &desc, 1, 0); + } +} + +static void intel_flush_svm_range_dev(struct intel_svm *svm, + struct intel_svm_dev *sdev, + unsigned long address, + unsigned long pages, int ih) +{ + unsigned long shift = ilog2(__roundup_pow_of_two(pages)); + unsigned long align = (1ULL << (VTD_PAGE_SHIFT + shift)); + unsigned long start = ALIGN_DOWN(address, align); + unsigned long end = ALIGN(address + (pages << VTD_PAGE_SHIFT), align); + + while (start < end) { + __flush_svm_range_dev(svm, sdev, start, align >> VTD_PAGE_SHIFT, ih); + start += align; } } @@ -211,7 +229,7 @@ static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm) */ rcu_read_lock(); list_for_each_entry_rcu(sdev, &svm->devs, list) - intel_pasid_tear_down_entry(svm->iommu, sdev->dev, + intel_pasid_tear_down_entry(sdev->iommu, sdev->dev, svm->pasid, true); rcu_read_unlock(); @@ -281,6 +299,7 @@ int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device *dev, struct dmar_domain *dmar_domain; struct device_domain_info *info; struct intel_svm *svm = NULL; + unsigned long iflags; int ret = 0; if (WARN_ON(!iommu) || !data) @@ -363,6 +382,7 @@ int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device *dev, } sdev->dev = dev; sdev->sid = PCI_DEVID(info->bus, info->devfn); + sdev->iommu = iommu; /* Only count users if device has aux domains */ if (iommu_dev_feature_enabled(dev, IOMMU_DEV_FEAT_AUX)) @@ -381,12 +401,12 @@ int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device *dev, * each bind of a new device even with an existing PASID, we need to * call the nested mode setup function here. */ - spin_lock(&iommu->lock); + spin_lock_irqsave(&iommu->lock, iflags); ret = intel_pasid_setup_nested(iommu, dev, (pgd_t *)(uintptr_t)data->gpgd, data->hpasid, &data->vendor.vtd, dmar_domain, data->addr_width); - spin_unlock(&iommu->lock); + spin_unlock_irqrestore(&iommu->lock, iflags); if (ret) { dev_err_ratelimited(dev, "Failed to set up PASID %llu in nested mode, Err %d\n", data->hpasid, ret); @@ -486,6 +506,7 @@ intel_svm_bind_mm(struct device *dev, unsigned int flags, struct device_domain_info *info; struct intel_svm_dev *sdev; struct intel_svm *svm = NULL; + unsigned long iflags; int pasid_max; int ret; @@ -546,6 +567,7 @@ intel_svm_bind_mm(struct device *dev, unsigned int flags, goto out; } sdev->dev = dev; + sdev->iommu = iommu; ret = intel_iommu_enable_pasid(iommu, dev); if (ret) { @@ -575,7 +597,6 @@ intel_svm_bind_mm(struct device *dev, unsigned int flags, kfree(sdev); goto out; } - svm->iommu = iommu; if (pasid_max > intel_pasid_max_id) pasid_max = intel_pasid_max_id; @@ -605,14 +626,14 @@ intel_svm_bind_mm(struct device *dev, unsigned int flags, } } - spin_lock(&iommu->lock); + spin_lock_irqsave(&iommu->lock, iflags); ret = intel_pasid_setup_first_level(iommu, dev, mm ? mm->pgd : init_mm.pgd, svm->pasid, FLPT_DEFAULT_DID, (mm ? 0 : PASID_FLAG_SUPERVISOR_MODE) | (cpu_feature_enabled(X86_FEATURE_LA57) ? PASID_FLAG_FL5LP : 0)); - spin_unlock(&iommu->lock); + spin_unlock_irqrestore(&iommu->lock, iflags); if (ret) { if (mm) mmu_notifier_unregister(&svm->notifier, mm); @@ -632,14 +653,14 @@ intel_svm_bind_mm(struct device *dev, unsigned int flags, * Binding a new device with existing PASID, need to setup * the PASID entry. */ - spin_lock(&iommu->lock); + spin_lock_irqsave(&iommu->lock, iflags); ret = intel_pasid_setup_first_level(iommu, dev, mm ? mm->pgd : init_mm.pgd, svm->pasid, FLPT_DEFAULT_DID, (mm ? 0 : PASID_FLAG_SUPERVISOR_MODE) | (cpu_feature_enabled(X86_FEATURE_LA57) ? PASID_FLAG_FL5LP : 0)); - spin_unlock(&iommu->lock); + spin_unlock_irqrestore(&iommu->lock, iflags); if (ret) { kfree(sdev); goto out; diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index a7a9bc08dcd115..bcfbd0e44a4a07 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -417,7 +417,13 @@ static arm_lpae_iopte arm_lpae_prot_to_pte(struct arm_lpae_io_pgtable *data, << ARM_LPAE_PTE_ATTRINDX_SHIFT); } - if (prot & IOMMU_CACHE) + /* + * Also Mali has its own notions of shareability wherein its Inner + * domain covers the cores within the GPU, and its Outer domain is + * "outside the GPU" (i.e. either the Inner or System domain in CPU + * terms, depending on coherency). + */ + if (prot & IOMMU_CACHE && data->iop.fmt != ARM_MALI_LPAE) pte |= ARM_LPAE_PTE_SH_IS; else pte |= ARM_LPAE_PTE_SH_OS; @@ -1021,6 +1027,9 @@ arm_mali_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie) cfg->arm_mali_lpae_cfg.transtab = virt_to_phys(data->pgd) | ARM_MALI_LPAE_TTBR_READ_INNER | ARM_MALI_LPAE_TTBR_ADRMODE_TABLE; + if (cfg->coherent_walk) + cfg->arm_mali_lpae_cfg.transtab |= ARM_MALI_LPAE_TTBR_SHARE_OUTER; + return &data->iop; out_free_data: diff --git a/drivers/irqchip/irq-alpine-msi.c b/drivers/irqchip/irq-alpine-msi.c index 23a3b877f7f1df..ede02dc2bcd0b9 100644 --- a/drivers/irqchip/irq-alpine-msi.c +++ b/drivers/irqchip/irq-alpine-msi.c @@ -165,8 +165,7 @@ static int alpine_msix_middle_domain_alloc(struct irq_domain *domain, return 0; err_sgi: - while (--i >= 0) - irq_domain_free_irqs_parent(domain, virq, i); + irq_domain_free_irqs_parent(domain, virq, i - 1); alpine_msix_free_sgi(priv, sgi, nr_irqs); return err; } diff --git a/drivers/irqchip/irq-mips-cpu.c b/drivers/irqchip/irq-mips-cpu.c index 95d4fd8f7a9681..0bbb0b2d0dd5f7 100644 --- a/drivers/irqchip/irq-mips-cpu.c +++ b/drivers/irqchip/irq-mips-cpu.c @@ -197,6 +197,13 @@ static int mips_cpu_ipi_alloc(struct irq_domain *domain, unsigned int virq, if (ret) return ret; + ret = irq_domain_set_hwirq_and_chip(domain->parent, virq + i, hwirq, + &mips_mt_cpu_irq_controller, + NULL); + + if (ret) + return ret; + ret = irq_set_irq_type(virq + i, IRQ_TYPE_LEVEL_HIGH); if (ret) return ret; diff --git a/drivers/irqchip/irq-ti-sci-inta.c b/drivers/irqchip/irq-ti-sci-inta.c index b2ab8db439d928..532d0ae172d9f8 100644 --- a/drivers/irqchip/irq-ti-sci-inta.c +++ b/drivers/irqchip/irq-ti-sci-inta.c @@ -726,7 +726,7 @@ static int ti_sci_inta_irq_domain_probe(struct platform_device *pdev) INIT_LIST_HEAD(&inta->vint_list); mutex_init(&inta->vint_mutex); - dev_info(dev, "Interrupt Aggregator domain %d created\n", pdev->id); + dev_info(dev, "Interrupt Aggregator domain %d created\n", inta->ti_sci_id); return 0; } diff --git a/drivers/irqchip/irq-ti-sci-intr.c b/drivers/irqchip/irq-ti-sci-intr.c index ac9d6d658e65cb..fe8fad22bcf962 100644 --- a/drivers/irqchip/irq-ti-sci-intr.c +++ b/drivers/irqchip/irq-ti-sci-intr.c @@ -129,7 +129,7 @@ static void ti_sci_intr_irq_domain_free(struct irq_domain *domain, * @virq: Corresponding Linux virtual IRQ number * @hwirq: Corresponding hwirq for the IRQ within this IRQ domain * - * Returns parent irq if all went well else appropriate error pointer. + * Returns intr output irq if all went well else appropriate error pointer. */ static int ti_sci_intr_alloc_parent_irq(struct irq_domain *domain, unsigned int virq, u32 hwirq) @@ -173,7 +173,7 @@ static int ti_sci_intr_alloc_parent_irq(struct irq_domain *domain, if (err) goto err_msg; - return p_hwirq; + return out_irq; err_msg: irq_domain_free_irqs_parent(domain, virq, 1); @@ -198,19 +198,19 @@ static int ti_sci_intr_irq_domain_alloc(struct irq_domain *domain, struct irq_fwspec *fwspec = data; unsigned long hwirq; unsigned int flags; - int err, p_hwirq; + int err, out_irq; err = ti_sci_intr_irq_domain_translate(domain, fwspec, &hwirq, &flags); if (err) return err; - p_hwirq = ti_sci_intr_alloc_parent_irq(domain, virq, hwirq); - if (p_hwirq < 0) - return p_hwirq; + out_irq = ti_sci_intr_alloc_parent_irq(domain, virq, hwirq); + if (out_irq < 0) + return out_irq; irq_domain_set_hwirq_and_chip(domain, virq, hwirq, &ti_sci_intr_irq_chip, - (void *)(uintptr_t)p_hwirq); + (void *)(uintptr_t)out_irq); return 0; } diff --git a/drivers/irqchip/qcom-pdc.c b/drivers/irqchip/qcom-pdc.c index bd39e9de6ecf73..5dc63c20b67ea9 100644 --- a/drivers/irqchip/qcom-pdc.c +++ b/drivers/irqchip/qcom-pdc.c @@ -159,6 +159,8 @@ static int qcom_pdc_gic_set_type(struct irq_data *d, unsigned int type) { int pin_out = d->hwirq; enum pdc_irq_config_bits pdc_type; + enum pdc_irq_config_bits old_pdc_type; + int ret; if (pin_out == GPIO_NO_WAKE_IRQ) return 0; @@ -187,9 +189,26 @@ static int qcom_pdc_gic_set_type(struct irq_data *d, unsigned int type) return -EINVAL; } + old_pdc_type = pdc_reg_read(IRQ_i_CFG, pin_out); pdc_reg_write(IRQ_i_CFG, pin_out, pdc_type); - return irq_chip_set_type_parent(d, type); + ret = irq_chip_set_type_parent(d, type); + if (ret) + return ret; + + /* + * When we change types the PDC can give a phantom interrupt. + * Clear it. Specifically the phantom shows up when reconfiguring + * polarity of interrupt without changing the state of the signal + * but let's be consistent and clear it always. + * + * Doing this works because we have IRQCHIP_SET_TYPE_MASKED so the + * interrupt will be cleared before the rest of the system sees it. + */ + if (old_pdc_type != pdc_type) + irq_chip_set_parent_state(d, IRQCHIP_STATE_PENDING, false); + + return 0; } static struct irq_chip qcom_pdc_gic_chip = { diff --git a/drivers/isdn/mISDN/Kconfig b/drivers/isdn/mISDN/Kconfig index 26cf0ac9c4ad0e..c9a53c2224728d 100644 --- a/drivers/isdn/mISDN/Kconfig +++ b/drivers/isdn/mISDN/Kconfig @@ -13,6 +13,7 @@ if MISDN != n config MISDN_DSP tristate "Digital Audio Processing of transparent data" depends on MISDN + select BITREVERSE help Enable support for digital audio processing capability. diff --git a/drivers/leds/led-triggers.c b/drivers/leds/led-triggers.c index 91da90cfb11d9f..4e7b78a84149be 100644 --- a/drivers/leds/led-triggers.c +++ b/drivers/leds/led-triggers.c @@ -378,14 +378,15 @@ void led_trigger_event(struct led_trigger *trig, enum led_brightness brightness) { struct led_classdev *led_cdev; + unsigned long flags; if (!trig) return; - read_lock(&trig->leddev_list_lock); + read_lock_irqsave(&trig->leddev_list_lock, flags); list_for_each_entry(led_cdev, &trig->led_cdevs, trig_list) led_set_brightness(led_cdev, brightness); - read_unlock(&trig->leddev_list_lock); + read_unlock_irqrestore(&trig->leddev_list_lock, flags); } EXPORT_SYMBOL_GPL(led_trigger_event); @@ -396,11 +397,12 @@ static void led_trigger_blink_setup(struct led_trigger *trig, int invert) { struct led_classdev *led_cdev; + unsigned long flags; if (!trig) return; - read_lock(&trig->leddev_list_lock); + read_lock_irqsave(&trig->leddev_list_lock, flags); list_for_each_entry(led_cdev, &trig->led_cdevs, trig_list) { if (oneshot) led_blink_set_oneshot(led_cdev, delay_on, delay_off, @@ -408,7 +410,7 @@ static void led_trigger_blink_setup(struct led_trigger *trig, else led_blink_set(led_cdev, delay_on, delay_off); } - read_unlock(&trig->leddev_list_lock); + read_unlock_irqrestore(&trig->leddev_list_lock, flags); } void led_trigger_blink(struct led_trigger *trig, diff --git a/drivers/leds/leds-lp50xx.c b/drivers/leds/leds-lp50xx.c index 5fb4f24aeb2e86..f13117eed976d9 100644 --- a/drivers/leds/leds-lp50xx.c +++ b/drivers/leds/leds-lp50xx.c @@ -487,8 +487,10 @@ static int lp50xx_probe_dt(struct lp50xx *priv) */ mc_led_info = devm_kcalloc(priv->dev, LP50XX_LEDS_PER_MODULE, sizeof(*mc_led_info), GFP_KERNEL); - if (!mc_led_info) - return -ENOMEM; + if (!mc_led_info) { + ret = -ENOMEM; + goto child_out; + } fwnode_for_each_child_node(child, led_node) { ret = fwnode_property_read_u32(led_node, "color", diff --git a/drivers/leds/leds-netxbig.c b/drivers/leds/leds-netxbig.c index e6fd47365b5887..68fbf0b66faddf 100644 --- a/drivers/leds/leds-netxbig.c +++ b/drivers/leds/leds-netxbig.c @@ -448,31 +448,39 @@ static int netxbig_leds_get_of_pdata(struct device *dev, gpio_ext = devm_kzalloc(dev, sizeof(*gpio_ext), GFP_KERNEL); if (!gpio_ext) { of_node_put(gpio_ext_np); - return -ENOMEM; + ret = -ENOMEM; + goto put_device; } ret = netxbig_gpio_ext_get(dev, gpio_ext_dev, gpio_ext); of_node_put(gpio_ext_np); if (ret) - return ret; + goto put_device; pdata->gpio_ext = gpio_ext; /* Timers (optional) */ ret = of_property_count_u32_elems(np, "timers"); if (ret > 0) { - if (ret % 3) - return -EINVAL; + if (ret % 3) { + ret = -EINVAL; + goto put_device; + } + num_timers = ret / 3; timers = devm_kcalloc(dev, num_timers, sizeof(*timers), GFP_KERNEL); - if (!timers) - return -ENOMEM; + if (!timers) { + ret = -ENOMEM; + goto put_device; + } for (i = 0; i < num_timers; i++) { u32 tmp; of_property_read_u32_index(np, "timers", 3 * i, &timers[i].mode); - if (timers[i].mode >= NETXBIG_LED_MODE_NUM) - return -EINVAL; + if (timers[i].mode >= NETXBIG_LED_MODE_NUM) { + ret = -EINVAL; + goto put_device; + } of_property_read_u32_index(np, "timers", 3 * i + 1, &tmp); timers[i].delay_on = tmp; @@ -488,12 +496,15 @@ static int netxbig_leds_get_of_pdata(struct device *dev, num_leds = of_get_available_child_count(np); if (!num_leds) { dev_err(dev, "No LED subnodes found in DT\n"); - return -ENODEV; + ret = -ENODEV; + goto put_device; } leds = devm_kcalloc(dev, num_leds, sizeof(*leds), GFP_KERNEL); - if (!leds) - return -ENOMEM; + if (!leds) { + ret = -ENOMEM; + goto put_device; + } led = leds; for_each_available_child_of_node(np, child) { @@ -574,6 +585,8 @@ static int netxbig_leds_get_of_pdata(struct device *dev, err_node_put: of_node_put(child); +put_device: + put_device(gpio_ext_dev); return ret; } diff --git a/drivers/leds/leds-turris-omnia.c b/drivers/leds/leds-turris-omnia.c index 8c5bdc3847ee73..880fc8def53090 100644 --- a/drivers/leds/leds-turris-omnia.c +++ b/drivers/leds/leds-turris-omnia.c @@ -98,9 +98,9 @@ static int omnia_led_register(struct i2c_client *client, struct omnia_led *led, } ret = of_property_read_u32(np, "color", &color); - if (ret || color != LED_COLOR_ID_MULTI) { + if (ret || color != LED_COLOR_ID_RGB) { dev_warn(dev, - "Node %pOF: must contain 'color' property with value LED_COLOR_ID_MULTI\n", + "Node %pOF: must contain 'color' property with value LED_COLOR_ID_RGB\n", np); return 0; } diff --git a/drivers/lightnvm/Kconfig b/drivers/lightnvm/Kconfig index 8f39f9ba5c80e8..4c2ce210c1237d 100644 --- a/drivers/lightnvm/Kconfig +++ b/drivers/lightnvm/Kconfig @@ -19,6 +19,7 @@ if NVM config NVM_PBLK tristate "Physical Block Device Open-Channel SSD target" + select CRC32 help Allows an open-channel SSD to be exposed as a block device to the host. The target assumes the device exposes raw flash and must be diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c index c1bcac71008c67..28ddcaa5358b14 100644 --- a/drivers/lightnvm/core.c +++ b/drivers/lightnvm/core.c @@ -844,11 +844,10 @@ static int nvm_bb_chunk_sense(struct nvm_dev *dev, struct ppa_addr ppa) rqd.ppa_addr = generic_to_dev_addr(dev, ppa); ret = nvm_submit_io_sync_raw(dev, &rqd); + __free_page(page); if (ret) return ret; - __free_page(page); - return rqd.error; } diff --git a/drivers/macintosh/adb-iop.c b/drivers/macintosh/adb-iop.c index f3d1a460fbce1d..0ee3272491501c 100644 --- a/drivers/macintosh/adb-iop.c +++ b/drivers/macintosh/adb-iop.c @@ -25,6 +25,7 @@ static struct adb_request *current_req; static struct adb_request *last_req; static unsigned int autopoll_devs; +static u8 autopoll_addr; static enum adb_iop_state { idle, @@ -41,6 +42,11 @@ static int adb_iop_autopoll(int); static void adb_iop_poll(void); static int adb_iop_reset_bus(void); +/* ADB command byte structure */ +#define ADDR_MASK 0xF0 +#define OP_MASK 0x0C +#define TALK 0x0C + struct adb_driver adb_iop_driver = { .name = "ISM IOP", .probe = adb_iop_probe, @@ -78,10 +84,7 @@ static void adb_iop_complete(struct iop_msg *msg) local_irq_save(flags); - if (current_req->reply_expected) - adb_iop_state = awaiting_reply; - else - adb_iop_done(); + adb_iop_state = awaiting_reply; local_irq_restore(flags); } @@ -89,38 +92,52 @@ static void adb_iop_complete(struct iop_msg *msg) /* * Listen for ADB messages from the IOP. * - * This will be called when unsolicited messages (usually replies to TALK - * commands or autopoll packets) are received. + * This will be called when unsolicited IOP messages are received. + * These IOP messages can carry ADB autopoll responses and also occur + * after explicit ADB commands. */ static void adb_iop_listen(struct iop_msg *msg) { struct adb_iopmsg *amsg = (struct adb_iopmsg *)msg->message; + u8 addr = (amsg->cmd & ADDR_MASK) >> 4; + u8 op = amsg->cmd & OP_MASK; unsigned long flags; bool req_done = false; local_irq_save(flags); - /* Handle a timeout. Timeout packets seem to occur even after - * we've gotten a valid reply to a TALK, presumably because of - * autopolling. + /* Responses to Talk commands may be unsolicited as they are + * produced when the IOP polls devices. They are mostly timeouts. */ - - if (amsg->flags & ADB_IOP_EXPLICIT) { + if (op == TALK && ((1 << addr) & autopoll_devs)) + autopoll_addr = addr; + + switch (amsg->flags & (ADB_IOP_EXPLICIT | + ADB_IOP_AUTOPOLL | + ADB_IOP_TIMEOUT)) { + case ADB_IOP_EXPLICIT: + case ADB_IOP_EXPLICIT | ADB_IOP_TIMEOUT: if (adb_iop_state == awaiting_reply) { struct adb_request *req = current_req; - req->reply_len = amsg->count + 1; - memcpy(req->reply, &amsg->cmd, req->reply_len); + if (req->reply_expected) { + req->reply_len = amsg->count + 1; + memcpy(req->reply, &amsg->cmd, req->reply_len); + } req_done = true; } - } else if (!(amsg->flags & ADB_IOP_TIMEOUT)) { - adb_input(&amsg->cmd, amsg->count + 1, - amsg->flags & ADB_IOP_AUTOPOLL); + break; + case ADB_IOP_AUTOPOLL: + if (((1 << addr) & autopoll_devs) && + amsg->cmd == ADB_READREG(addr, 0)) + adb_input(&amsg->cmd, amsg->count + 1, 1); + break; } - - msg->reply[0] = autopoll_devs ? ADB_IOP_AUTOPOLL : 0; + msg->reply[0] = autopoll_addr ? ADB_IOP_AUTOPOLL : 0; + msg->reply[1] = 0; + msg->reply[2] = autopoll_addr ? ADB_READREG(autopoll_addr, 0) : 0; iop_complete_message(msg); if (req_done) @@ -233,6 +250,9 @@ static void adb_iop_set_ap_complete(struct iop_msg *msg) struct adb_iopmsg *amsg = (struct adb_iopmsg *)msg->message; autopoll_devs = (amsg->data[1] << 8) | amsg->data[0]; + if (autopoll_devs & (1 << autopoll_addr)) + return; + autopoll_addr = autopoll_devs ? (ffs(autopoll_devs) - 1) : 0; } static int adb_iop_autopoll(int devs) diff --git a/drivers/mailbox/arm_mhu_db.c b/drivers/mailbox/arm_mhu_db.c index 275efe4cca0c21..8eb66c4ecf5bf4 100644 --- a/drivers/mailbox/arm_mhu_db.c +++ b/drivers/mailbox/arm_mhu_db.c @@ -180,7 +180,7 @@ static void mhu_db_shutdown(struct mbox_chan *chan) /* Reset channel */ mhu_db_mbox_clear_irq(chan); - kfree(chan->con_priv); + devm_kfree(mbox->dev, chan->con_priv); chan->con_priv = NULL; } diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index 30ba3573626c2f..2cefb075b2b846 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -585,6 +585,7 @@ config DM_INTEGRITY select BLK_DEV_INTEGRITY select DM_BUFIO select CRYPTO + select CRYPTO_SKCIPHER select ASYNC_XOR help This device-mapper target emulates a block device that has @@ -602,6 +603,7 @@ config DM_ZONED tristate "Drive-managed zoned block device target support" depends on BLK_DEV_DM depends on BLK_DEV_ZONED + select CRC32 help This device-mapper target takes a host-managed or host-aware zoned block device and exposes most of its capacity as a regular block diff --git a/drivers/md/bcache/features.c b/drivers/md/bcache/features.c index 6469223f0b7771..d636b7b2d070c4 100644 --- a/drivers/md/bcache/features.c +++ b/drivers/md/bcache/features.c @@ -17,7 +17,7 @@ struct feature { }; static struct feature feature_list[] = { - {BCH_FEATURE_INCOMPAT, BCH_FEATURE_INCOMPAT_LARGE_BUCKET, + {BCH_FEATURE_INCOMPAT, BCH_FEATURE_INCOMPAT_LOG_LARGE_BUCKET_SIZE, "large_bucket"}, {0, 0, 0 }, }; diff --git a/drivers/md/bcache/features.h b/drivers/md/bcache/features.h index a1653c4780416c..d1c8fd3977fc64 100644 --- a/drivers/md/bcache/features.h +++ b/drivers/md/bcache/features.h @@ -13,11 +13,15 @@ /* Feature set definition */ /* Incompat feature set */ -#define BCH_FEATURE_INCOMPAT_LARGE_BUCKET 0x0001 /* 32bit bucket size */ +/* 32bit bucket size, obsoleted */ +#define BCH_FEATURE_INCOMPAT_OBSO_LARGE_BUCKET 0x0001 +/* real bucket size is (1 << bucket_size) */ +#define BCH_FEATURE_INCOMPAT_LOG_LARGE_BUCKET_SIZE 0x0002 -#define BCH_FEATURE_COMPAT_SUUP 0 -#define BCH_FEATURE_RO_COMPAT_SUUP 0 -#define BCH_FEATURE_INCOMPAT_SUUP BCH_FEATURE_INCOMPAT_LARGE_BUCKET +#define BCH_FEATURE_COMPAT_SUPP 0 +#define BCH_FEATURE_RO_COMPAT_SUPP 0 +#define BCH_FEATURE_INCOMPAT_SUPP (BCH_FEATURE_INCOMPAT_OBSO_LARGE_BUCKET| \ + BCH_FEATURE_INCOMPAT_LOG_LARGE_BUCKET_SIZE) #define BCH_HAS_COMPAT_FEATURE(sb, mask) \ ((sb)->feature_compat & (mask)) @@ -29,6 +33,8 @@ #define BCH_FEATURE_COMPAT_FUNCS(name, flagname) \ static inline int bch_has_feature_##name(struct cache_sb *sb) \ { \ + if (sb->version < BCACHE_SB_VERSION_CDEV_WITH_FEATURES) \ + return 0; \ return (((sb)->feature_compat & \ BCH##_FEATURE_COMPAT_##flagname) != 0); \ } \ @@ -46,6 +52,8 @@ static inline void bch_clear_feature_##name(struct cache_sb *sb) \ #define BCH_FEATURE_RO_COMPAT_FUNCS(name, flagname) \ static inline int bch_has_feature_##name(struct cache_sb *sb) \ { \ + if (sb->version < BCACHE_SB_VERSION_CDEV_WITH_FEATURES) \ + return 0; \ return (((sb)->feature_ro_compat & \ BCH##_FEATURE_RO_COMPAT_##flagname) != 0); \ } \ @@ -63,6 +71,8 @@ static inline void bch_clear_feature_##name(struct cache_sb *sb) \ #define BCH_FEATURE_INCOMPAT_FUNCS(name, flagname) \ static inline int bch_has_feature_##name(struct cache_sb *sb) \ { \ + if (sb->version < BCACHE_SB_VERSION_CDEV_WITH_FEATURES) \ + return 0; \ return (((sb)->feature_incompat & \ BCH##_FEATURE_INCOMPAT_##flagname) != 0); \ } \ @@ -77,7 +87,23 @@ static inline void bch_clear_feature_##name(struct cache_sb *sb) \ ~BCH##_FEATURE_INCOMPAT_##flagname; \ } -BCH_FEATURE_INCOMPAT_FUNCS(large_bucket, LARGE_BUCKET); +BCH_FEATURE_INCOMPAT_FUNCS(obso_large_bucket, OBSO_LARGE_BUCKET); +BCH_FEATURE_INCOMPAT_FUNCS(large_bucket, LOG_LARGE_BUCKET_SIZE); + +static inline bool bch_has_unknown_compat_features(struct cache_sb *sb) +{ + return ((sb->feature_compat & ~BCH_FEATURE_COMPAT_SUPP) != 0); +} + +static inline bool bch_has_unknown_ro_compat_features(struct cache_sb *sb) +{ + return ((sb->feature_ro_compat & ~BCH_FEATURE_RO_COMPAT_SUPP) != 0); +} + +static inline bool bch_has_unknown_incompat_features(struct cache_sb *sb) +{ + return ((sb->feature_incompat & ~BCH_FEATURE_INCOMPAT_SUPP) != 0); +} int bch_print_cache_set_feature_compat(struct cache_set *c, char *buf, int size); int bch_print_cache_set_feature_ro_compat(struct cache_set *c, char *buf, int size); diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 46a00134a36ae1..a148b92ad85636 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -64,9 +64,25 @@ static unsigned int get_bucket_size(struct cache_sb *sb, struct cache_sb_disk *s { unsigned int bucket_size = le16_to_cpu(s->bucket_size); - if (sb->version >= BCACHE_SB_VERSION_CDEV_WITH_FEATURES && - bch_has_feature_large_bucket(sb)) - bucket_size |= le16_to_cpu(s->bucket_size_hi) << 16; + if (sb->version >= BCACHE_SB_VERSION_CDEV_WITH_FEATURES) { + if (bch_has_feature_large_bucket(sb)) { + unsigned int max, order; + + max = sizeof(unsigned int) * BITS_PER_BYTE - 1; + order = le16_to_cpu(s->bucket_size); + /* + * bcache tool will make sure the overflow won't + * happen, an error message here is enough. + */ + if (order > max) + pr_err("Bucket size (1 << %u) overflows\n", + order); + bucket_size = 1 << order; + } else if (bch_has_feature_obso_large_bucket(sb)) { + bucket_size += + le16_to_cpu(s->obso_bucket_size_hi) << 16; + } + } return bucket_size; } @@ -228,6 +244,20 @@ static const char *read_super(struct cache_sb *sb, struct block_device *bdev, sb->feature_compat = le64_to_cpu(s->feature_compat); sb->feature_incompat = le64_to_cpu(s->feature_incompat); sb->feature_ro_compat = le64_to_cpu(s->feature_ro_compat); + + /* Check incompatible features */ + err = "Unsupported compatible feature found"; + if (bch_has_unknown_compat_features(sb)) + goto err; + + err = "Unsupported read-only compatible feature found"; + if (bch_has_unknown_ro_compat_features(sb)) + goto err; + + err = "Unsupported incompatible feature found"; + if (bch_has_unknown_incompat_features(sb)) + goto err; + err = read_super_common(sb, bdev, s); if (err) goto err; @@ -1311,6 +1341,12 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c, bcache_device_link(&dc->disk, c, "bdev"); atomic_inc(&c->attached_dev_nr); + if (bch_has_feature_obso_large_bucket(&(c->cache->sb))) { + pr_err("The obsoleted large bucket layout is unsupported, set the bcache device into read-only\n"); + pr_err("Please update to the latest bcache-tools to create the cache device\n"); + set_disk_ro(dc->disk.disk, 1); + } + /* Allow the writeback thread to proceed */ up_write(&dc->writeback_lock); @@ -1534,6 +1570,12 @@ static int flash_dev_run(struct cache_set *c, struct uuid_entry *u) bcache_device_link(d, c, "volume"); + if (bch_has_feature_obso_large_bucket(&c->cache->sb)) { + pr_err("The obsoleted large bucket layout is unsupported, set the bcache device into read-only\n"); + pr_err("Please update to the latest bcache-tools to create the cache device\n"); + set_disk_ro(d->disk, 1); + } + return 0; err: kobject_put(&d->kobj); @@ -2093,6 +2135,9 @@ static int run_cache_set(struct cache_set *c) c->cache->sb.last_mount = (u32)ktime_get_real_seconds(); bcache_write_super(c); + if (bch_has_feature_obso_large_bucket(&c->cache->sb)) + pr_err("Detect obsoleted large bucket layout, all attached bcache device will be read-only\n"); + list_for_each_entry_safe(dc, t, &uncached_devices, list) bch_cached_dev_attach(dc, c, NULL); diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index 9c1a86bde658e4..fce4cbf9529d6c 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -1534,6 +1534,12 @@ sector_t dm_bufio_get_device_size(struct dm_bufio_client *c) } EXPORT_SYMBOL_GPL(dm_bufio_get_device_size); +struct dm_io_client *dm_bufio_get_dm_io_client(struct dm_bufio_client *c) +{ + return c->dm_io; +} +EXPORT_SYMBOL_GPL(dm_bufio_get_dm_io_client); + sector_t dm_bufio_get_block_number(struct dm_buffer *b) { return b->block; diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 392337f16ecfd8..875823d6ee7e05 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1454,13 +1454,16 @@ static int crypt_convert_block_skcipher(struct crypt_config *cc, static void kcryptd_async_done(struct crypto_async_request *async_req, int error); -static void crypt_alloc_req_skcipher(struct crypt_config *cc, +static int crypt_alloc_req_skcipher(struct crypt_config *cc, struct convert_context *ctx) { unsigned key_index = ctx->cc_sector & (cc->tfms_count - 1); - if (!ctx->r.req) - ctx->r.req = mempool_alloc(&cc->req_pool, GFP_NOIO); + if (!ctx->r.req) { + ctx->r.req = mempool_alloc(&cc->req_pool, in_interrupt() ? GFP_ATOMIC : GFP_NOIO); + if (!ctx->r.req) + return -ENOMEM; + } skcipher_request_set_tfm(ctx->r.req, cc->cipher_tfm.tfms[key_index]); @@ -1471,13 +1474,18 @@ static void crypt_alloc_req_skcipher(struct crypt_config *cc, skcipher_request_set_callback(ctx->r.req, CRYPTO_TFM_REQ_MAY_BACKLOG, kcryptd_async_done, dmreq_of_req(cc, ctx->r.req)); + + return 0; } -static void crypt_alloc_req_aead(struct crypt_config *cc, +static int crypt_alloc_req_aead(struct crypt_config *cc, struct convert_context *ctx) { - if (!ctx->r.req_aead) - ctx->r.req_aead = mempool_alloc(&cc->req_pool, GFP_NOIO); + if (!ctx->r.req_aead) { + ctx->r.req_aead = mempool_alloc(&cc->req_pool, in_interrupt() ? GFP_ATOMIC : GFP_NOIO); + if (!ctx->r.req_aead) + return -ENOMEM; + } aead_request_set_tfm(ctx->r.req_aead, cc->cipher_tfm.tfms_aead[0]); @@ -1488,15 +1496,17 @@ static void crypt_alloc_req_aead(struct crypt_config *cc, aead_request_set_callback(ctx->r.req_aead, CRYPTO_TFM_REQ_MAY_BACKLOG, kcryptd_async_done, dmreq_of_req(cc, ctx->r.req_aead)); + + return 0; } -static void crypt_alloc_req(struct crypt_config *cc, +static int crypt_alloc_req(struct crypt_config *cc, struct convert_context *ctx) { if (crypt_integrity_aead(cc)) - crypt_alloc_req_aead(cc, ctx); + return crypt_alloc_req_aead(cc, ctx); else - crypt_alloc_req_skcipher(cc, ctx); + return crypt_alloc_req_skcipher(cc, ctx); } static void crypt_free_req_skcipher(struct crypt_config *cc, @@ -1529,17 +1539,28 @@ static void crypt_free_req(struct crypt_config *cc, void *req, struct bio *base_ * Encrypt / decrypt data from one bio to another one (can be the same one) */ static blk_status_t crypt_convert(struct crypt_config *cc, - struct convert_context *ctx, bool atomic) + struct convert_context *ctx, bool atomic, bool reset_pending) { unsigned int tag_offset = 0; unsigned int sector_step = cc->sector_size >> SECTOR_SHIFT; int r; - atomic_set(&ctx->cc_pending, 1); + /* + * if reset_pending is set we are dealing with the bio for the first time, + * else we're continuing to work on the previous bio, so don't mess with + * the cc_pending counter + */ + if (reset_pending) + atomic_set(&ctx->cc_pending, 1); while (ctx->iter_in.bi_size && ctx->iter_out.bi_size) { - crypt_alloc_req(cc, ctx); + r = crypt_alloc_req(cc, ctx); + if (r) { + complete(&ctx->restart); + return BLK_STS_DEV_RESOURCE; + } + atomic_inc(&ctx->cc_pending); if (crypt_integrity_aead(cc)) @@ -1553,7 +1574,25 @@ static blk_status_t crypt_convert(struct crypt_config *cc, * but the driver request queue is full, let's wait. */ case -EBUSY: - wait_for_completion(&ctx->restart); + if (in_interrupt()) { + if (try_wait_for_completion(&ctx->restart)) { + /* + * we don't have to block to wait for completion, + * so proceed + */ + } else { + /* + * we can't wait for completion without blocking + * exit and continue processing in a workqueue + */ + ctx->r.req = NULL; + ctx->cc_sector += sector_step; + tag_offset++; + return BLK_STS_DEV_RESOURCE; + } + } else { + wait_for_completion(&ctx->restart); + } reinit_completion(&ctx->restart); fallthrough; /* @@ -1691,6 +1730,12 @@ static void crypt_inc_pending(struct dm_crypt_io *io) atomic_inc(&io->io_pending); } +static void kcryptd_io_bio_endio(struct work_struct *work) +{ + struct dm_crypt_io *io = container_of(work, struct dm_crypt_io, work); + bio_endio(io->base_bio); +} + /* * One of the bios was finished. Check for completion of * the whole request and correctly clean up the buffer. @@ -1713,7 +1758,23 @@ static void crypt_dec_pending(struct dm_crypt_io *io) kfree(io->integrity_metadata); base_bio->bi_status = error; - bio_endio(base_bio); + + /* + * If we are running this function from our tasklet, + * we can't call bio_endio() here, because it will call + * clone_endio() from dm.c, which in turn will + * free the current struct dm_crypt_io structure with + * our tasklet. In this case we need to delay bio_endio() + * execution to after the tasklet is done and dequeued. + */ + if (tasklet_trylock(&io->tasklet)) { + tasklet_unlock(&io->tasklet); + bio_endio(base_bio); + return; + } + + INIT_WORK(&io->work, kcryptd_io_bio_endio); + queue_work(cc->io_queue, &io->work); } /* @@ -1945,6 +2006,37 @@ static bool kcryptd_crypt_write_inline(struct crypt_config *cc, } } +static void kcryptd_crypt_write_continue(struct work_struct *work) +{ + struct dm_crypt_io *io = container_of(work, struct dm_crypt_io, work); + struct crypt_config *cc = io->cc; + struct convert_context *ctx = &io->ctx; + int crypt_finished; + sector_t sector = io->sector; + blk_status_t r; + + wait_for_completion(&ctx->restart); + reinit_completion(&ctx->restart); + + r = crypt_convert(cc, &io->ctx, true, false); + if (r) + io->error = r; + crypt_finished = atomic_dec_and_test(&ctx->cc_pending); + if (!crypt_finished && kcryptd_crypt_write_inline(cc, ctx)) { + /* Wait for completion signaled by kcryptd_async_done() */ + wait_for_completion(&ctx->restart); + crypt_finished = 1; + } + + /* Encryption was already finished, submit io now */ + if (crypt_finished) { + kcryptd_crypt_write_io_submit(io, 0); + io->sector = sector; + } + + crypt_dec_pending(io); +} + static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) { struct crypt_config *cc = io->cc; @@ -1973,7 +2065,17 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) crypt_inc_pending(io); r = crypt_convert(cc, ctx, - test_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags)); + test_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags), true); + /* + * Crypto API backlogged the request, because its queue was full + * and we're in softirq context, so continue from a workqueue + * (TODO: is it actually possible to be in softirq in the write path?) + */ + if (r == BLK_STS_DEV_RESOURCE) { + INIT_WORK(&io->work, kcryptd_crypt_write_continue); + queue_work(cc->crypt_queue, &io->work); + return; + } if (r) io->error = r; crypt_finished = atomic_dec_and_test(&ctx->cc_pending); @@ -1998,6 +2100,25 @@ static void kcryptd_crypt_read_done(struct dm_crypt_io *io) crypt_dec_pending(io); } +static void kcryptd_crypt_read_continue(struct work_struct *work) +{ + struct dm_crypt_io *io = container_of(work, struct dm_crypt_io, work); + struct crypt_config *cc = io->cc; + blk_status_t r; + + wait_for_completion(&io->ctx.restart); + reinit_completion(&io->ctx.restart); + + r = crypt_convert(cc, &io->ctx, true, false); + if (r) + io->error = r; + + if (atomic_dec_and_test(&io->ctx.cc_pending)) + kcryptd_crypt_read_done(io); + + crypt_dec_pending(io); +} + static void kcryptd_crypt_read_convert(struct dm_crypt_io *io) { struct crypt_config *cc = io->cc; @@ -2009,7 +2130,16 @@ static void kcryptd_crypt_read_convert(struct dm_crypt_io *io) io->sector); r = crypt_convert(cc, &io->ctx, - test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags)); + test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags), true); + /* + * Crypto API backlogged the request, because its queue was full + * and we're in softirq context, so continue from a workqueue + */ + if (r == BLK_STS_DEV_RESOURCE) { + INIT_WORK(&io->work, kcryptd_crypt_read_continue); + queue_work(cc->crypt_queue, &io->work); + return; + } if (r) io->error = r; @@ -2091,8 +2221,12 @@ static void kcryptd_queue_crypt(struct dm_crypt_io *io) if ((bio_data_dir(io->base_bio) == READ && test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags)) || (bio_data_dir(io->base_bio) == WRITE && test_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags))) { - if (in_irq()) { - /* Crypto API's "skcipher_walk_first() refuses to work in hard IRQ context */ + /* + * in_irq(): Crypto API's skcipher_walk_first() refuses to work in hard IRQ context. + * irqs_disabled(): the kernel may run some IO completion from the idle thread, but + * it is being executed with irqs disabled. + */ + if (in_irq() || irqs_disabled()) { tasklet_init(&io->tasklet, kcryptd_crypt_tasklet, (unsigned long)&io->work); tasklet_schedule(&io->tasklet); return; diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 5a7a1b90e671cf..b64fede032dc53 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -257,8 +257,9 @@ struct dm_integrity_c { bool journal_uptodate; bool just_formatted; bool recalculate_flag; - bool fix_padding; bool discard; + bool fix_padding; + bool legacy_recalculate; struct alg_spec internal_hash_alg; struct alg_spec journal_crypt_alg; @@ -386,6 +387,14 @@ static int dm_integrity_failed(struct dm_integrity_c *ic) return READ_ONCE(ic->failed); } +static bool dm_integrity_disable_recalculate(struct dm_integrity_c *ic) +{ + if ((ic->internal_hash_alg.key || ic->journal_mac_alg.key) && + !ic->legacy_recalculate) + return true; + return false; +} + static commit_id_t dm_integrity_commit_id(struct dm_integrity_c *ic, unsigned i, unsigned j, unsigned char seq) { @@ -1379,12 +1388,52 @@ static int dm_integrity_rw_tag(struct dm_integrity_c *ic, unsigned char *tag, se #undef MAY_BE_HASH } -static void dm_integrity_flush_buffers(struct dm_integrity_c *ic) +struct flush_request { + struct dm_io_request io_req; + struct dm_io_region io_reg; + struct dm_integrity_c *ic; + struct completion comp; +}; + +static void flush_notify(unsigned long error, void *fr_) +{ + struct flush_request *fr = fr_; + if (unlikely(error != 0)) + dm_integrity_io_error(fr->ic, "flusing disk cache", -EIO); + complete(&fr->comp); +} + +static void dm_integrity_flush_buffers(struct dm_integrity_c *ic, bool flush_data) { int r; + + struct flush_request fr; + + if (!ic->meta_dev) + flush_data = false; + if (flush_data) { + fr.io_req.bi_op = REQ_OP_WRITE, + fr.io_req.bi_op_flags = REQ_PREFLUSH | REQ_SYNC, + fr.io_req.mem.type = DM_IO_KMEM, + fr.io_req.mem.ptr.addr = NULL, + fr.io_req.notify.fn = flush_notify, + fr.io_req.notify.context = &fr; + fr.io_req.client = dm_bufio_get_dm_io_client(ic->bufio), + fr.io_reg.bdev = ic->dev->bdev, + fr.io_reg.sector = 0, + fr.io_reg.count = 0, + fr.ic = ic; + init_completion(&fr.comp); + r = dm_io(&fr.io_req, 1, &fr.io_reg, NULL); + BUG_ON(r); + } + r = dm_bufio_write_dirty_buffers(ic->bufio); if (unlikely(r)) dm_integrity_io_error(ic, "writing tags", r); + + if (flush_data) + wait_for_completion(&fr.comp); } static void sleep_on_endio_wait(struct dm_integrity_c *ic) @@ -2110,7 +2159,7 @@ static void dm_integrity_map_continue(struct dm_integrity_io *dio, bool from_map if (unlikely(dio->op == REQ_OP_DISCARD) && likely(ic->mode != 'D')) { integrity_metadata(&dio->work); - dm_integrity_flush_buffers(ic); + dm_integrity_flush_buffers(ic, false); dio->in_flight = (atomic_t)ATOMIC_INIT(1); dio->completion = NULL; @@ -2195,7 +2244,7 @@ static void integrity_commit(struct work_struct *w) flushes = bio_list_get(&ic->flush_bio_list); if (unlikely(ic->mode != 'J')) { spin_unlock_irq(&ic->endio_wait.lock); - dm_integrity_flush_buffers(ic); + dm_integrity_flush_buffers(ic, true); goto release_flush_bios; } @@ -2409,7 +2458,7 @@ static void do_journal_write(struct dm_integrity_c *ic, unsigned write_start, complete_journal_op(&comp); wait_for_completion_io(&comp.comp); - dm_integrity_flush_buffers(ic); + dm_integrity_flush_buffers(ic, true); } static void integrity_writer(struct work_struct *w) @@ -2451,7 +2500,7 @@ static void recalc_write_super(struct dm_integrity_c *ic) { int r; - dm_integrity_flush_buffers(ic); + dm_integrity_flush_buffers(ic, false); if (dm_integrity_failed(ic)) return; @@ -2654,7 +2703,7 @@ static void bitmap_flush_work(struct work_struct *work) unsigned long limit; struct bio *bio; - dm_integrity_flush_buffers(ic); + dm_integrity_flush_buffers(ic, false); range.logical_sector = 0; range.n_sectors = ic->provided_data_sectors; @@ -2663,9 +2712,7 @@ static void bitmap_flush_work(struct work_struct *work) add_new_range_and_wait(ic, &range); spin_unlock_irq(&ic->endio_wait.lock); - dm_integrity_flush_buffers(ic); - if (ic->meta_dev) - blkdev_issue_flush(ic->dev->bdev, GFP_NOIO); + dm_integrity_flush_buffers(ic, true); limit = ic->provided_data_sectors; if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) { @@ -2934,11 +2981,11 @@ static void dm_integrity_postsuspend(struct dm_target *ti) if (ic->meta_dev) queue_work(ic->writer_wq, &ic->writer_work); drain_workqueue(ic->writer_wq); - dm_integrity_flush_buffers(ic); + dm_integrity_flush_buffers(ic, true); } if (ic->mode == 'B') { - dm_integrity_flush_buffers(ic); + dm_integrity_flush_buffers(ic, true); #if 1 /* set to 0 to test bitmap replay code */ init_journal(ic, 0, ic->journal_sections, 0); @@ -3102,6 +3149,7 @@ static void dm_integrity_status(struct dm_target *ti, status_type_t type, arg_count += !!ic->journal_crypt_alg.alg_string; arg_count += !!ic->journal_mac_alg.alg_string; arg_count += (ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_PADDING)) != 0; + arg_count += ic->legacy_recalculate; DMEMIT("%s %llu %u %c %u", ic->dev->name, ic->start, ic->tag_size, ic->mode, arg_count); if (ic->meta_dev) @@ -3125,6 +3173,8 @@ static void dm_integrity_status(struct dm_target *ti, status_type_t type, } if ((ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_PADDING)) != 0) DMEMIT(" fix_padding"); + if (ic->legacy_recalculate) + DMEMIT(" legacy_recalculate"); #define EMIT_ALG(a, n) \ do { \ @@ -3754,7 +3804,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) unsigned extra_args; struct dm_arg_set as; static const struct dm_arg _args[] = { - {0, 9, "Invalid number of feature args"}, + {0, 16, "Invalid number of feature args"}, }; unsigned journal_sectors, interleave_sectors, buffer_sectors, journal_watermark, sync_msec; bool should_write_sb; @@ -3902,6 +3952,8 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) ic->discard = true; } else if (!strcmp(opt_string, "fix_padding")) { ic->fix_padding = true; + } else if (!strcmp(opt_string, "legacy_recalculate")) { + ic->legacy_recalculate = true; } else { r = -EINVAL; ti->error = "Invalid argument"; @@ -4197,6 +4249,20 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) r = -ENOMEM; goto bad; } + } else { + if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) { + ti->error = "Recalculate can only be specified with internal_hash"; + r = -EINVAL; + goto bad; + } + } + + if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING) && + le64_to_cpu(ic->sb->recalc_sector) < ic->provided_data_sectors && + dm_integrity_disable_recalculate(ic)) { + ti->error = "Recalculating with HMAC is disabled for security reasons - if you really need it, use the argument \"legacy_recalculate\""; + r = -EOPNOTSUPP; + goto bad; } ic->bufio = dm_bufio_client_create(ic->meta_dev ? ic->meta_dev->bdev : ic->dev->bdev, diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index cd0478d44058b0..5e306bba437514 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -1600,6 +1600,7 @@ static int target_message(struct file *filp, struct dm_ioctl *param, size_t para if (!argc) { DMWARN("Empty message received."); + r = -EINVAL; goto out_argv; } diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index dc8568ab96f243..6dca932d6f1d1e 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -3730,14 +3730,12 @@ static void raid_io_hints(struct dm_target *ti, struct queue_limits *limits) blk_limits_io_opt(limits, chunk_size_bytes * mddev_data_stripes(rs)); /* - * RAID10 personality requires bio splitting, - * RAID0/1/4/5/6 don't and process large discard bios properly. + * RAID0 and RAID10 personalities require bio splitting, + * RAID1/4/5/6 don't and process large discard bios properly. */ - if (rs_is_raid10(rs)) { - limits->discard_granularity = max(chunk_size_bytes, - limits->discard_granularity); - limits->max_discard_sectors = min_not_zero(rs->md.chunk_sectors, - limits->max_discard_sectors); + if (rs_is_raid0(rs) || rs_is_raid10(rs)) { + limits->discard_granularity = chunk_size_bytes; + limits->max_discard_sectors = rs->md.chunk_sectors; } } diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 4668b2cd98f4e2..11890db71f3fe3 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -141,6 +141,11 @@ struct dm_snapshot { * for them to be committed. */ struct bio_list bios_queued_during_merge; + + /* + * Flush data after merge. + */ + struct bio flush_bio; }; /* @@ -1121,6 +1126,17 @@ static void snapshot_merge_next_chunks(struct dm_snapshot *s) static void error_bios(struct bio *bio); +static int flush_data(struct dm_snapshot *s) +{ + struct bio *flush_bio = &s->flush_bio; + + bio_reset(flush_bio); + bio_set_dev(flush_bio, s->origin->bdev); + flush_bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; + + return submit_bio_wait(flush_bio); +} + static void merge_callback(int read_err, unsigned long write_err, void *context) { struct dm_snapshot *s = context; @@ -1134,6 +1150,11 @@ static void merge_callback(int read_err, unsigned long write_err, void *context) goto shut; } + if (flush_data(s) < 0) { + DMERR("Flush after merge failed: shutting down merge"); + goto shut; + } + if (s->store->type->commit_merge(s->store, s->num_merging_chunks) < 0) { DMERR("Write error in exception store: shutting down merge"); @@ -1318,6 +1339,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) s->first_merging_chunk = 0; s->num_merging_chunks = 0; bio_list_init(&s->bios_queued_during_merge); + bio_init(&s->flush_bio, NULL, 0); /* Allocate hash table for COW data */ if (init_hash_tables(s)) { @@ -1504,6 +1526,8 @@ static void snapshot_dtr(struct dm_target *ti) dm_exception_store_destroy(s->store); + bio_uninit(&s->flush_bio); + dm_put_device(ti, s->cow); dm_put_device(ti, s->origin); diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 7eeb7c4169c949..09ded08cbb609e 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -370,14 +370,23 @@ int dm_get_device(struct dm_target *ti, const char *path, fmode_t mode, { int r; dev_t dev; + unsigned int major, minor; + char dummy; struct dm_dev_internal *dd; struct dm_table *t = ti->table; BUG_ON(!t); - dev = dm_get_dev_t(path); - if (!dev) - return -ENODEV; + if (sscanf(path, "%u:%u%c", &major, &minor, &dummy) == 2) { + /* Extract the major/minor numbers */ + dev = MKDEV(major, minor); + if (MAJOR(dev) != major || MINOR(dev) != minor) + return -EOVERFLOW; + } else { + dev = dm_get_dev_t(path); + if (!dev) + return -ENODEV; + } dd = find_device(&t->devices, dev); if (!dd) { diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index f74982dcbea0dd..6b8e5bdd8526d9 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -537,6 +537,15 @@ static int verity_verify_io(struct dm_verity_io *io) return 0; } +/* + * Skip verity work in response to I/O error when system is shutting down. + */ +static inline bool verity_is_system_shutting_down(void) +{ + return system_state == SYSTEM_HALT || system_state == SYSTEM_POWER_OFF + || system_state == SYSTEM_RESTART; +} + /* * End one "io" structure with a given error. */ @@ -564,7 +573,8 @@ static void verity_end_io(struct bio *bio) { struct dm_verity_io *io = bio->bi_private; - if (bio->bi_status && !verity_fec_is_enabled(io->v)) { + if (bio->bi_status && + (!verity_fec_is_enabled(io->v) || verity_is_system_shutting_down())) { verity_finish_io(io, bio->bi_status); return; } diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 4e0cbfe3f14d43..1e99a4c1eca43d 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -562,7 +562,7 @@ static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode, * subset of the parent bdev; require extra privileges. */ if (!capable(CAP_SYS_RAWIO)) { - DMWARN_LIMIT( + DMDEBUG_LIMIT( "%s: sending ioctl %x to DM device without required privilege.", current->comm, cmd); r = -ENOIOCTLCMD; diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c index 4aaf4820b6f625..f0e64e76fd7938 100644 --- a/drivers/md/md-cluster.c +++ b/drivers/md/md-cluster.c @@ -664,9 +664,27 @@ static void recv_daemon(struct md_thread *thread) * Takes the lock on the TOKEN lock resource so no other * node can communicate while the operation is underway. */ -static int lock_token(struct md_cluster_info *cinfo, bool mddev_locked) +static int lock_token(struct md_cluster_info *cinfo) { - int error, set_bit = 0; + int error; + + error = dlm_lock_sync(cinfo->token_lockres, DLM_LOCK_EX); + if (error) { + pr_err("md-cluster(%s:%d): failed to get EX on TOKEN (%d)\n", + __func__, __LINE__, error); + } else { + /* Lock the receive sequence */ + mutex_lock(&cinfo->recv_mutex); + } + return error; +} + +/* lock_comm() + * Sets the MD_CLUSTER_SEND_LOCK bit to lock the send channel. + */ +static int lock_comm(struct md_cluster_info *cinfo, bool mddev_locked) +{ + int rv, set_bit = 0; struct mddev *mddev = cinfo->mddev; /* @@ -677,34 +695,19 @@ static int lock_token(struct md_cluster_info *cinfo, bool mddev_locked) */ if (mddev_locked && !test_bit(MD_CLUSTER_HOLDING_MUTEX_FOR_RECVD, &cinfo->state)) { - error = test_and_set_bit_lock(MD_CLUSTER_HOLDING_MUTEX_FOR_RECVD, + rv = test_and_set_bit_lock(MD_CLUSTER_HOLDING_MUTEX_FOR_RECVD, &cinfo->state); - WARN_ON_ONCE(error); + WARN_ON_ONCE(rv); md_wakeup_thread(mddev->thread); set_bit = 1; } - error = dlm_lock_sync(cinfo->token_lockres, DLM_LOCK_EX); - if (set_bit) - clear_bit_unlock(MD_CLUSTER_HOLDING_MUTEX_FOR_RECVD, &cinfo->state); - if (error) - pr_err("md-cluster(%s:%d): failed to get EX on TOKEN (%d)\n", - __func__, __LINE__, error); - - /* Lock the receive sequence */ - mutex_lock(&cinfo->recv_mutex); - return error; -} - -/* lock_comm() - * Sets the MD_CLUSTER_SEND_LOCK bit to lock the send channel. - */ -static int lock_comm(struct md_cluster_info *cinfo, bool mddev_locked) -{ wait_event(cinfo->wait, !test_and_set_bit(MD_CLUSTER_SEND_LOCK, &cinfo->state)); - - return lock_token(cinfo, mddev_locked); + rv = lock_token(cinfo); + if (set_bit) + clear_bit_unlock(MD_CLUSTER_HOLDING_MUTEX_FOR_RECVD, &cinfo->state); + return rv; } static void unlock_comm(struct md_cluster_info *cinfo) @@ -784,9 +787,11 @@ static int sendmsg(struct md_cluster_info *cinfo, struct cluster_msg *cmsg, { int ret; - lock_comm(cinfo, mddev_locked); - ret = __sendmsg(cinfo, cmsg); - unlock_comm(cinfo); + ret = lock_comm(cinfo, mddev_locked); + if (!ret) { + ret = __sendmsg(cinfo, cmsg); + unlock_comm(cinfo); + } return ret; } @@ -1061,7 +1066,7 @@ static int metadata_update_start(struct mddev *mddev) return 0; } - ret = lock_token(cinfo, 1); + ret = lock_token(cinfo); clear_bit_unlock(MD_CLUSTER_HOLDING_MUTEX_FOR_RECVD, &cinfo->state); return ret; } @@ -1255,7 +1260,10 @@ static void update_size(struct mddev *mddev, sector_t old_dev_sectors) int raid_slot = -1; md_update_sb(mddev, 1); - lock_comm(cinfo, 1); + if (lock_comm(cinfo, 1)) { + pr_err("%s: lock_comm failed\n", __func__); + return; + } memset(&cmsg, 0, sizeof(cmsg)); cmsg.type = cpu_to_le32(METADATA_UPDATED); @@ -1407,7 +1415,8 @@ static int add_new_disk(struct mddev *mddev, struct md_rdev *rdev) cmsg.type = cpu_to_le32(NEWDISK); memcpy(cmsg.uuid, uuid, 16); cmsg.raid_slot = cpu_to_le32(rdev->desc_nr); - lock_comm(cinfo, 1); + if (lock_comm(cinfo, 1)) + return -EAGAIN; ret = __sendmsg(cinfo, &cmsg); if (ret) { unlock_comm(cinfo); diff --git a/drivers/md/md.c b/drivers/md/md.c index 0037c6ecab6501..3be74cf3635fe8 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -6948,8 +6948,10 @@ static int hot_remove_disk(struct mddev *mddev, dev_t dev) goto busy; kick_rdev: - if (mddev_is_clustered(mddev)) - md_cluster_ops->remove_disk(mddev, rdev); + if (mddev_is_clustered(mddev)) { + if (md_cluster_ops->remove_disk(mddev, rdev)) + goto busy; + } md_kick_rdev_from_array(rdev); set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags); @@ -7278,6 +7280,7 @@ static int update_raid_disks(struct mddev *mddev, int raid_disks) return -EINVAL; if (mddev->sync_thread || test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) || + test_bit(MD_RESYNCING_REMOTE, &mddev->recovery) || mddev->reshape_position != MaxSector) return -EBUSY; @@ -7590,8 +7593,11 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, err = -EBUSY; goto out; } - WARN_ON_ONCE(test_bit(MD_CLOSING, &mddev->flags)); - set_bit(MD_CLOSING, &mddev->flags); + if (test_and_set_bit(MD_CLOSING, &mddev->flags)) { + mutex_unlock(&mddev->open_mutex); + err = -EBUSY; + goto out; + } did_set_md_closing = true; mutex_unlock(&mddev->open_mutex); sync_blockdev(bdev); @@ -9642,8 +9648,11 @@ static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev) } } - if (mddev->raid_disks != le32_to_cpu(sb->raid_disks)) - update_raid_disks(mddev, le32_to_cpu(sb->raid_disks)); + if (mddev->raid_disks != le32_to_cpu(sb->raid_disks)) { + ret = update_raid_disks(mddev, le32_to_cpu(sb->raid_disks)); + if (ret) + pr_warn("md: updating array disks failed. %d\n", ret); + } /* * Since mddev->delta_disks has already updated in update_raid_disks, diff --git a/drivers/md/md.h b/drivers/md/md.h index bb645bc3ba6d6b..2175a5ac4f7c68 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -311,7 +311,7 @@ struct mddev { int external; /* metadata is * managed externally */ char metadata_type[17]; /* externally set*/ - unsigned int chunk_sectors; + int chunk_sectors; time64_t ctime, utime; int level, layout; char clevel[16]; @@ -339,7 +339,7 @@ struct mddev { */ sector_t reshape_position; int delta_disks, new_level, new_layout; - unsigned int new_chunk_sectors; + int new_chunk_sectors; int reshape_backwards; struct md_thread *thread; /* management thread */ diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 3b598a3cb462af..9f9d8b67b5dd18 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1128,7 +1128,7 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio, struct md_rdev *err_rdev = NULL; gfp_t gfp = GFP_NOIO; - if (r10_bio->devs[slot].rdev) { + if (slot >= 0 && r10_bio->devs[slot].rdev) { /* * This is an error retry, but we cannot * safely dereference the rdev in the r10_bio, @@ -1493,6 +1493,7 @@ static void __make_request(struct mddev *mddev, struct bio *bio, int sectors) r10_bio->mddev = mddev; r10_bio->sector = bio->bi_iter.bi_sector; r10_bio->state = 0; + r10_bio->read_slot = -1; memset(r10_bio->devs, 0, sizeof(r10_bio->devs[0]) * conf->copies); if (bio_data_dir(bio) == READ) diff --git a/drivers/media/cec/platform/Makefile b/drivers/media/cec/platform/Makefile index 3a947159b25ac9..ea6f8ee8161c96 100644 --- a/drivers/media/cec/platform/Makefile +++ b/drivers/media/cec/platform/Makefile @@ -10,5 +10,6 @@ obj-$(CONFIG_CEC_MESON_AO) += meson/ obj-$(CONFIG_CEC_SAMSUNG_S5P) += s5p/ obj-$(CONFIG_CEC_SECO) += seco/ obj-$(CONFIG_CEC_STI) += sti/ +obj-$(CONFIG_CEC_STM32) += stm32/ obj-$(CONFIG_CEC_TEGRA) += tegra/ diff --git a/drivers/media/common/siano/smsdvb-main.c b/drivers/media/common/siano/smsdvb-main.c index 88f90dfd368b10..ae17407e477a49 100644 --- a/drivers/media/common/siano/smsdvb-main.c +++ b/drivers/media/common/siano/smsdvb-main.c @@ -1169,12 +1169,15 @@ static int smsdvb_hotplug(struct smscore_device_t *coredev, rc = dvb_create_media_graph(&client->adapter, true); if (rc < 0) { pr_err("dvb_create_media_graph failed %d\n", rc); - goto client_error; + goto media_graph_error; } pr_info("DVB interface registered.\n"); return 0; +media_graph_error: + smsdvb_debugfs_release(client); + client_error: dvb_unregister_frontend(&client->frontend); diff --git a/drivers/media/common/videobuf2/videobuf2-v4l2.c b/drivers/media/common/videobuf2/videobuf2-v4l2.c index 96d3b2b2aa3188..3f61f5863bf774 100644 --- a/drivers/media/common/videobuf2/videobuf2-v4l2.c +++ b/drivers/media/common/videobuf2/videobuf2-v4l2.c @@ -118,8 +118,7 @@ static int __verify_length(struct vb2_buffer *vb, const struct v4l2_buffer *b) return -EINVAL; } } else { - length = (b->memory == VB2_MEMORY_USERPTR || - b->memory == VB2_MEMORY_DMABUF) + length = (b->memory == VB2_MEMORY_USERPTR) ? b->length : vb->planes[0].length; if (b->bytesused > length) diff --git a/drivers/media/i2c/imx214.c b/drivers/media/i2c/imx214.c index 1ef5af9a8c8bcd..cee1a4817af996 100644 --- a/drivers/media/i2c/imx214.c +++ b/drivers/media/i2c/imx214.c @@ -786,7 +786,7 @@ static int imx214_s_stream(struct v4l2_subdev *subdev, int enable) if (ret < 0) goto err_rpm_put; } else { - ret = imx214_start_streaming(imx214); + ret = imx214_stop_streaming(imx214); if (ret < 0) goto err_rpm_put; pm_runtime_put(imx214->dev); diff --git a/drivers/media/i2c/imx219.c b/drivers/media/i2c/imx219.c index 1cee45e3535547..0ae66091a69624 100644 --- a/drivers/media/i2c/imx219.c +++ b/drivers/media/i2c/imx219.c @@ -473,8 +473,8 @@ static const struct imx219_mode supported_modes[] = { .width = 3280, .height = 2464, .crop = { - .left = 0, - .top = 0, + .left = IMX219_PIXEL_ARRAY_LEFT, + .top = IMX219_PIXEL_ARRAY_TOP, .width = 3280, .height = 2464 }, @@ -489,8 +489,8 @@ static const struct imx219_mode supported_modes[] = { .width = 1920, .height = 1080, .crop = { - .left = 680, - .top = 692, + .left = 688, + .top = 700, .width = 1920, .height = 1080 }, @@ -505,8 +505,8 @@ static const struct imx219_mode supported_modes[] = { .width = 1640, .height = 1232, .crop = { - .left = 0, - .top = 0, + .left = IMX219_PIXEL_ARRAY_LEFT, + .top = IMX219_PIXEL_ARRAY_TOP, .width = 3280, .height = 2464 }, @@ -521,8 +521,8 @@ static const struct imx219_mode supported_modes[] = { .width = 640, .height = 480, .crop = { - .left = 1000, - .top = 752, + .left = 1008, + .top = 760, .width = 1280, .height = 960 }, @@ -1008,6 +1008,7 @@ static int imx219_get_selection(struct v4l2_subdev *sd, return 0; case V4L2_SEL_TGT_CROP_DEFAULT: + case V4L2_SEL_TGT_CROP_BOUNDS: sel->r.top = IMX219_PIXEL_ARRAY_TOP; sel->r.left = IMX219_PIXEL_ARRAY_LEFT; sel->r.width = IMX219_PIXEL_ARRAY_WIDTH; diff --git a/drivers/media/i2c/max2175.c b/drivers/media/i2c/max2175.c index 03b4ed3a61b837..661208c9bfc5d7 100644 --- a/drivers/media/i2c/max2175.c +++ b/drivers/media/i2c/max2175.c @@ -503,7 +503,7 @@ static void max2175_set_bbfilter(struct max2175 *ctx) } } -static bool max2175_set_csm_mode(struct max2175 *ctx, +static int max2175_set_csm_mode(struct max2175 *ctx, enum max2175_csm_mode new_mode) { int ret = max2175_poll_csm_ready(ctx); diff --git a/drivers/media/i2c/max9271.c b/drivers/media/i2c/max9271.c index 0f6f7a092a463a..c247db569bab08 100644 --- a/drivers/media/i2c/max9271.c +++ b/drivers/media/i2c/max9271.c @@ -223,12 +223,12 @@ int max9271_enable_gpios(struct max9271_device *dev, u8 gpio_mask) { int ret; - ret = max9271_read(dev, 0x0f); + ret = max9271_read(dev, 0x0e); if (ret < 0) return 0; /* BIT(0) reserved: GPO is always enabled. */ - ret |= gpio_mask | BIT(0); + ret |= (gpio_mask & ~BIT(0)); ret = max9271_write(dev, 0x0e, ret); if (ret < 0) { dev_err(&dev->client->dev, "Failed to enable gpio (%d)\n", ret); @@ -245,12 +245,12 @@ int max9271_disable_gpios(struct max9271_device *dev, u8 gpio_mask) { int ret; - ret = max9271_read(dev, 0x0f); + ret = max9271_read(dev, 0x0e); if (ret < 0) return 0; /* BIT(0) reserved: GPO cannot be disabled */ - ret &= (~gpio_mask | BIT(0)); + ret &= ~(gpio_mask | BIT(0)); ret = max9271_write(dev, 0x0e, ret); if (ret < 0) { dev_err(&dev->client->dev, "Failed to disable gpio (%d)\n", ret); diff --git a/drivers/media/i2c/ov5640.c b/drivers/media/i2c/ov5640.c index 8d0254d0e5ea7a..8f0812e8590128 100644 --- a/drivers/media/i2c/ov5640.c +++ b/drivers/media/i2c/ov5640.c @@ -1216,20 +1216,6 @@ static int ov5640_set_autogain(struct ov5640_dev *sensor, bool on) BIT(1), on ? 0 : BIT(1)); } -static int ov5640_set_stream_bt656(struct ov5640_dev *sensor, bool on) -{ - int ret; - - ret = ov5640_write_reg(sensor, OV5640_REG_CCIR656_CTRL00, - on ? 0x1 : 0x00); - if (ret) - return ret; - - return ov5640_write_reg(sensor, OV5640_REG_SYS_CTRL0, on ? - OV5640_REG_SYS_CTRL0_SW_PWUP : - OV5640_REG_SYS_CTRL0_SW_PWDN); -} - static int ov5640_set_stream_dvp(struct ov5640_dev *sensor, bool on) { return ov5640_write_reg(sensor, OV5640_REG_SYS_CTRL0, on ? @@ -1994,13 +1980,13 @@ static int ov5640_set_power_mipi(struct ov5640_dev *sensor, bool on) static int ov5640_set_power_dvp(struct ov5640_dev *sensor, bool on) { unsigned int flags = sensor->ep.bus.parallel.flags; - u8 pclk_pol = 0; - u8 hsync_pol = 0; - u8 vsync_pol = 0; + bool bt656 = sensor->ep.bus_type == V4L2_MBUS_BT656; + u8 polarities = 0; int ret; if (!on) { /* Reset settings to their default values. */ + ov5640_write_reg(sensor, OV5640_REG_CCIR656_CTRL00, 0x00); ov5640_write_reg(sensor, OV5640_REG_IO_MIPI_CTRL00, 0x58); ov5640_write_reg(sensor, OV5640_REG_POLARITY_CTRL00, 0x20); ov5640_write_reg(sensor, OV5640_REG_PAD_OUTPUT_ENABLE01, 0x00); @@ -2024,7 +2010,35 @@ static int ov5640_set_power_dvp(struct ov5640_dev *sensor, bool on) * - VSYNC: active high * - HREF: active low * - PCLK: active low + * + * VSYNC & HREF are not configured if BT656 bus mode is selected */ + + /* + * BT656 embedded synchronization configuration + * + * CCIR656 CTRL00 + * - [7]: SYNC code selection (0: auto generate sync code, + * 1: sync code from regs 0x4732-0x4735) + * - [6]: f value in CCIR656 SYNC code when fixed f value + * - [5]: Fixed f value + * - [4:3]: Blank toggle data options (00: data=1'h040/1'h200, + * 01: data from regs 0x4736-0x4738, 10: always keep 0) + * - [1]: Clip data disable + * - [0]: CCIR656 mode enable + * + * Default CCIR656 SAV/EAV mode with default codes + * SAV=0xff000080 & EAV=0xff00009d is enabled here with settings: + * - CCIR656 mode enable + * - auto generation of sync codes + * - blank toggle data 1'h040/1'h200 + * - clip reserved data (0x00 & 0xff changed to 0x01 & 0xfe) + */ + ret = ov5640_write_reg(sensor, OV5640_REG_CCIR656_CTRL00, + bt656 ? 0x01 : 0x00); + if (ret) + return ret; + /* * configure parallel port control lines polarity * @@ -2035,29 +2049,26 @@ static int ov5640_set_power_dvp(struct ov5640_dev *sensor, bool on) * datasheet and hardware, 0 is active high * and 1 is active low...) */ - if (sensor->ep.bus_type == V4L2_MBUS_PARALLEL) { - if (flags & V4L2_MBUS_PCLK_SAMPLE_RISING) - pclk_pol = 1; + if (!bt656) { if (flags & V4L2_MBUS_HSYNC_ACTIVE_HIGH) - hsync_pol = 1; + polarities |= BIT(1); if (flags & V4L2_MBUS_VSYNC_ACTIVE_LOW) - vsync_pol = 1; - - ret = ov5640_write_reg(sensor, OV5640_REG_POLARITY_CTRL00, - (pclk_pol << 5) | (hsync_pol << 1) | - vsync_pol); - - if (ret) - return ret; + polarities |= BIT(0); } + if (flags & V4L2_MBUS_PCLK_SAMPLE_RISING) + polarities |= BIT(5); + + ret = ov5640_write_reg(sensor, OV5640_REG_POLARITY_CTRL00, polarities); + if (ret) + return ret; /* - * powerdown MIPI TX/RX PHY & disable MIPI + * powerdown MIPI TX/RX PHY & enable DVP * * MIPI CONTROL 00 - * 4: PWDN PHY TX - * 3: PWDN PHY RX - * 2: MIPI enable + * [4] = 1 : Power down MIPI HS Tx + * [3] = 1 : Power down MIPI LS Rx + * [2] = 0 : DVP enable (MIPI disable) */ ret = ov5640_write_reg(sensor, OV5640_REG_IO_MIPI_CTRL00, 0x18); if (ret) @@ -2074,8 +2085,7 @@ static int ov5640_set_power_dvp(struct ov5640_dev *sensor, bool on) * - [3:0]: D[9:6] output enable */ ret = ov5640_write_reg(sensor, OV5640_REG_PAD_OUTPUT_ENABLE01, - sensor->ep.bus_type == V4L2_MBUS_PARALLEL ? - 0x7f : 0x1f); + bt656 ? 0x1f : 0x7f); if (ret) return ret; @@ -2925,8 +2935,6 @@ static int ov5640_s_stream(struct v4l2_subdev *sd, int enable) if (sensor->ep.bus_type == V4L2_MBUS_CSI2_DPHY) ret = ov5640_set_stream_mipi(sensor, enable); - else if (sensor->ep.bus_type == V4L2_MBUS_BT656) - ret = ov5640_set_stream_bt656(sensor, enable); else ret = ov5640_set_stream_dvp(sensor, enable); diff --git a/drivers/media/i2c/rdacm20.c b/drivers/media/i2c/rdacm20.c index 1ed928c4ca70fc..16bcb764b0e0d1 100644 --- a/drivers/media/i2c/rdacm20.c +++ b/drivers/media/i2c/rdacm20.c @@ -487,9 +487,18 @@ static int rdacm20_initialize(struct rdacm20_device *dev) * Reset the sensor by cycling the OV10635 reset signal connected to the * MAX9271 GPIO1 and verify communication with the OV10635. */ - max9271_clear_gpios(dev->serializer, MAX9271_GPIO1OUT); + ret = max9271_enable_gpios(dev->serializer, MAX9271_GPIO1OUT); + if (ret) + return ret; + + ret = max9271_clear_gpios(dev->serializer, MAX9271_GPIO1OUT); + if (ret) + return ret; usleep_range(10000, 15000); - max9271_set_gpios(dev->serializer, MAX9271_GPIO1OUT); + + ret = max9271_set_gpios(dev->serializer, MAX9271_GPIO1OUT); + if (ret) + return ret; usleep_range(10000, 15000); again: diff --git a/drivers/media/i2c/tvp5150.c b/drivers/media/i2c/tvp5150.c index 7d9401219a3ac6..3b3221fd3fe8fa 100644 --- a/drivers/media/i2c/tvp5150.c +++ b/drivers/media/i2c/tvp5150.c @@ -2082,6 +2082,7 @@ static int tvp5150_parse_dt(struct tvp5150 *decoder, struct device_node *np) ep_np = of_graph_get_endpoint_by_regs(np, TVP5150_PAD_VID_OUT, 0); if (!ep_np) { + ret = -EINVAL; dev_err(dev, "Error no output endpoint available\n"); goto err_free; } diff --git a/drivers/media/pci/intel/ipu3/ipu3-cio2.c b/drivers/media/pci/intel/ipu3/ipu3-cio2.c index 4e598e937dfe2b..1fcd131482e0eb 100644 --- a/drivers/media/pci/intel/ipu3/ipu3-cio2.c +++ b/drivers/media/pci/intel/ipu3/ipu3-cio2.c @@ -791,6 +791,7 @@ static void cio2_vb2_return_all_buffers(struct cio2_queue *q, atomic_dec(&q->bufs_queued); vb2_buffer_done(&q->bufs[i]->vbb.vb2_buf, state); + q->bufs[i] = NULL; } } } @@ -1232,29 +1233,15 @@ static int cio2_subdev_get_fmt(struct v4l2_subdev *sd, struct v4l2_subdev_format *fmt) { struct cio2_queue *q = container_of(sd, struct cio2_queue, subdev); - struct v4l2_subdev_format format; - int ret; - - if (fmt->which == V4L2_SUBDEV_FORMAT_TRY) { - fmt->format = *v4l2_subdev_get_try_format(sd, cfg, fmt->pad); - return 0; - } - if (fmt->pad == CIO2_PAD_SINK) { - format.which = V4L2_SUBDEV_FORMAT_ACTIVE; - ret = v4l2_subdev_call(sd, pad, get_fmt, NULL, - &format); + mutex_lock(&q->subdev_lock); - if (ret) - return ret; - /* update colorspace etc */ - q->subdev_fmt.colorspace = format.format.colorspace; - q->subdev_fmt.ycbcr_enc = format.format.ycbcr_enc; - q->subdev_fmt.quantization = format.format.quantization; - q->subdev_fmt.xfer_func = format.format.xfer_func; - } + if (fmt->which == V4L2_SUBDEV_FORMAT_TRY) + fmt->format = *v4l2_subdev_get_try_format(sd, cfg, fmt->pad); + else + fmt->format = q->subdev_fmt; - fmt->format = q->subdev_fmt; + mutex_unlock(&q->subdev_lock); return 0; } @@ -1271,6 +1258,9 @@ static int cio2_subdev_set_fmt(struct v4l2_subdev *sd, struct v4l2_subdev_format *fmt) { struct cio2_queue *q = container_of(sd, struct cio2_queue, subdev); + struct v4l2_mbus_framefmt *mbus; + u32 mbus_code = fmt->format.code; + unsigned int i; /* * Only allow setting sink pad format; @@ -1279,16 +1269,29 @@ static int cio2_subdev_set_fmt(struct v4l2_subdev *sd, if (fmt->pad == CIO2_PAD_SOURCE) return cio2_subdev_get_fmt(sd, cfg, fmt); - if (fmt->which == V4L2_SUBDEV_FORMAT_TRY) { - *v4l2_subdev_get_try_format(sd, cfg, fmt->pad) = fmt->format; - } else { - /* It's the sink, allow changing frame size */ - q->subdev_fmt.width = fmt->format.width; - q->subdev_fmt.height = fmt->format.height; - q->subdev_fmt.code = fmt->format.code; - fmt->format = q->subdev_fmt; + if (fmt->which == V4L2_SUBDEV_FORMAT_TRY) + mbus = v4l2_subdev_get_try_format(sd, cfg, fmt->pad); + else + mbus = &q->subdev_fmt; + + fmt->format.code = formats[0].mbus_code; + + for (i = 0; i < ARRAY_SIZE(formats); i++) { + if (formats[i].mbus_code == fmt->format.code) { + fmt->format.code = mbus_code; + break; + } } + fmt->format.width = min_t(u32, fmt->format.width, CIO2_IMAGE_MAX_WIDTH); + fmt->format.height = min_t(u32, fmt->format.height, + CIO2_IMAGE_MAX_LENGTH); + fmt->format.field = V4L2_FIELD_NONE; + + mutex_lock(&q->subdev_lock); + *mbus = fmt->format; + mutex_unlock(&q->subdev_lock); + return 0; } @@ -1547,6 +1550,7 @@ static int cio2_queue_init(struct cio2_device *cio2, struct cio2_queue *q) /* Initialize miscellaneous variables */ mutex_init(&q->lock); + mutex_init(&q->subdev_lock); /* Initialize formats to default values */ fmt = &q->subdev_fmt; @@ -1663,6 +1667,7 @@ static int cio2_queue_init(struct cio2_device *cio2, struct cio2_queue *q) fail_subdev_media_entity: cio2_fbpt_exit(q, &cio2->pci_dev->dev); fail_fbpt: + mutex_destroy(&q->subdev_lock); mutex_destroy(&q->lock); return r; @@ -1675,6 +1680,7 @@ static void cio2_queue_exit(struct cio2_device *cio2, struct cio2_queue *q) v4l2_device_unregister_subdev(&q->subdev); media_entity_cleanup(&q->subdev.entity); cio2_fbpt_exit(q, &cio2->pci_dev->dev); + mutex_destroy(&q->subdev_lock); mutex_destroy(&q->lock); } diff --git a/drivers/media/pci/intel/ipu3/ipu3-cio2.h b/drivers/media/pci/intel/ipu3/ipu3-cio2.h index 549b08f88f0c7a..146492383aa5b7 100644 --- a/drivers/media/pci/intel/ipu3/ipu3-cio2.h +++ b/drivers/media/pci/intel/ipu3/ipu3-cio2.h @@ -335,6 +335,7 @@ struct cio2_queue { /* Subdev, /dev/v4l-subdevX */ struct v4l2_subdev subdev; + struct mutex subdev_lock; /* Serialise acces to subdev_fmt field */ struct media_pad subdev_pads[CIO2_PADS]; struct v4l2_mbus_framefmt subdev_fmt; atomic_t frame_sequence; diff --git a/drivers/media/pci/netup_unidvb/netup_unidvb_spi.c b/drivers/media/pci/netup_unidvb/netup_unidvb_spi.c index d4f12c250f91ad..526042d8afae53 100644 --- a/drivers/media/pci/netup_unidvb/netup_unidvb_spi.c +++ b/drivers/media/pci/netup_unidvb/netup_unidvb_spi.c @@ -175,7 +175,7 @@ int netup_spi_init(struct netup_unidvb_dev *ndev) struct spi_master *master; struct netup_spi *nspi; - master = spi_alloc_master(&ndev->pci_dev->dev, + master = devm_spi_alloc_master(&ndev->pci_dev->dev, sizeof(struct netup_spi)); if (!master) { dev_err(&ndev->pci_dev->dev, @@ -208,6 +208,7 @@ int netup_spi_init(struct netup_unidvb_dev *ndev) ndev->pci_slot, ndev->pci_func); if (!spi_new_device(master, &netup_spi_board)) { + spi_unregister_master(master); ndev->spi = NULL; dev_err(&ndev->pci_dev->dev, "%s(): unable to create SPI device\n", __func__); @@ -226,13 +227,13 @@ void netup_spi_release(struct netup_unidvb_dev *ndev) if (!spi) return; + spi_unregister_master(spi->master); spin_lock_irqsave(&spi->lock, flags); reg = readw(&spi->regs->control_stat); writew(reg | NETUP_SPI_CTRL_IRQ, &spi->regs->control_stat); reg = readw(&spi->regs->control_stat); writew(reg & ~NETUP_SPI_CTRL_IMASK, &spi->regs->control_stat); spin_unlock_irqrestore(&spi->lock, flags); - spi_unregister_master(spi->master); ndev->spi = NULL; } diff --git a/drivers/media/pci/saa7146/mxb.c b/drivers/media/pci/saa7146/mxb.c index 129a1f8ebe1adc..73fc901ecf3db5 100644 --- a/drivers/media/pci/saa7146/mxb.c +++ b/drivers/media/pci/saa7146/mxb.c @@ -641,16 +641,17 @@ static int vidioc_s_audio(struct file *file, void *fh, const struct v4l2_audio * struct mxb *mxb = (struct mxb *)dev->ext_priv; DEB_D("VIDIOC_S_AUDIO %d\n", a->index); - if (mxb_inputs[mxb->cur_input].audioset & (1 << a->index)) { - if (mxb->cur_audinput != a->index) { - mxb->cur_audinput = a->index; - tea6420_route(mxb, a->index); - if (mxb->cur_audinput == 0) - mxb_update_audmode(mxb); - } - return 0; + if (a->index >= 32 || + !(mxb_inputs[mxb->cur_input].audioset & (1 << a->index))) + return -EINVAL; + + if (mxb->cur_audinput != a->index) { + mxb->cur_audinput = a->index; + tea6420_route(mxb, a->index); + if (mxb->cur_audinput == 0) + mxb_update_audmode(mxb); } - return -EINVAL; + return 0; } #ifdef CONFIG_VIDEO_ADV_DEBUG diff --git a/drivers/media/pci/solo6x10/solo6x10-g723.c b/drivers/media/pci/solo6x10/solo6x10-g723.c index 906ce86437ae38..d137b94869d82a 100644 --- a/drivers/media/pci/solo6x10/solo6x10-g723.c +++ b/drivers/media/pci/solo6x10/solo6x10-g723.c @@ -385,7 +385,7 @@ int solo_g723_init(struct solo_dev *solo_dev) ret = snd_ctl_add(card, snd_ctl_new1(&kctl, solo_dev)); if (ret < 0) - return ret; + goto snd_error; ret = solo_snd_pcm_init(solo_dev); if (ret < 0) diff --git a/drivers/media/platform/mtk-jpeg/mtk_jpeg_core.c b/drivers/media/platform/mtk-jpeg/mtk_jpeg_core.c index 227245ccaedc79..88a23bce569d95 100644 --- a/drivers/media/platform/mtk-jpeg/mtk_jpeg_core.c +++ b/drivers/media/platform/mtk-jpeg/mtk_jpeg_core.c @@ -1306,6 +1306,7 @@ static int mtk_jpeg_clk_init(struct mtk_jpeg_dev *jpeg) jpeg->variant->clks); if (ret) { dev_err(&pdev->dev, "failed to get jpeg clock:%d\n", ret); + put_device(&pdev->dev); return ret; } @@ -1331,6 +1332,12 @@ static void mtk_jpeg_job_timeout_work(struct work_struct *work) v4l2_m2m_buf_done(dst_buf, VB2_BUF_STATE_ERROR); v4l2_m2m_job_finish(jpeg->m2m_dev, ctx->fh.m2m_ctx); } + +static inline void mtk_jpeg_clk_release(struct mtk_jpeg_dev *jpeg) +{ + put_device(jpeg->larb); +} + static int mtk_jpeg_probe(struct platform_device *pdev) { struct mtk_jpeg_dev *jpeg; @@ -1435,6 +1442,7 @@ static int mtk_jpeg_probe(struct platform_device *pdev) v4l2_device_unregister(&jpeg->v4l2_dev); err_dev_register: + mtk_jpeg_clk_release(jpeg); err_clk_init: @@ -1452,6 +1460,7 @@ static int mtk_jpeg_remove(struct platform_device *pdev) video_device_release(jpeg->vdev); v4l2_m2m_release(jpeg->m2m_dev); v4l2_device_unregister(&jpeg->v4l2_dev); + mtk_jpeg_clk_release(jpeg); return 0; } diff --git a/drivers/media/platform/mtk-vcodec/mtk_vcodec_dec_pm.c b/drivers/media/platform/mtk-vcodec/mtk_vcodec_dec_pm.c index 36dfe3fc056a4c..ddee7046ce4225 100644 --- a/drivers/media/platform/mtk-vcodec/mtk_vcodec_dec_pm.c +++ b/drivers/media/platform/mtk-vcodec/mtk_vcodec_dec_pm.c @@ -47,11 +47,14 @@ int mtk_vcodec_init_dec_pm(struct mtk_vcodec_dev *mtkdev) dec_clk->clk_info = devm_kcalloc(&pdev->dev, dec_clk->clk_num, sizeof(*clk_info), GFP_KERNEL); - if (!dec_clk->clk_info) - return -ENOMEM; + if (!dec_clk->clk_info) { + ret = -ENOMEM; + goto put_device; + } } else { mtk_v4l2_err("Failed to get vdec clock count"); - return -EINVAL; + ret = -EINVAL; + goto put_device; } for (i = 0; i < dec_clk->clk_num; i++) { @@ -60,25 +63,29 @@ int mtk_vcodec_init_dec_pm(struct mtk_vcodec_dev *mtkdev) "clock-names", i, &clk_info->clk_name); if (ret) { mtk_v4l2_err("Failed to get clock name id = %d", i); - return ret; + goto put_device; } clk_info->vcodec_clk = devm_clk_get(&pdev->dev, clk_info->clk_name); if (IS_ERR(clk_info->vcodec_clk)) { mtk_v4l2_err("devm_clk_get (%d)%s fail", i, clk_info->clk_name); - return PTR_ERR(clk_info->vcodec_clk); + ret = PTR_ERR(clk_info->vcodec_clk); + goto put_device; } } pm_runtime_enable(&pdev->dev); - + return 0; +put_device: + put_device(pm->larbvdec); return ret; } void mtk_vcodec_release_dec_pm(struct mtk_vcodec_dev *dev) { pm_runtime_disable(dev->pm.dev); + put_device(dev->pm.larbvdec); } void mtk_vcodec_dec_pw_on(struct mtk_vcodec_pm *pm) diff --git a/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc_pm.c b/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc_pm.c index ee22902aaa71cf..1a047c25679fa3 100644 --- a/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc_pm.c +++ b/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc_pm.c @@ -47,14 +47,16 @@ int mtk_vcodec_init_enc_pm(struct mtk_vcodec_dev *mtkdev) node = of_parse_phandle(dev->of_node, "mediatek,larb", 1); if (!node) { mtk_v4l2_err("no mediatek,larb found"); - return -ENODEV; + ret = -ENODEV; + goto put_larbvenc; } pdev = of_find_device_by_node(node); of_node_put(node); if (!pdev) { mtk_v4l2_err("no mediatek,larb device found"); - return -ENODEV; + ret = -ENODEV; + goto put_larbvenc; } pm->larbvenclt = &pdev->dev; @@ -67,11 +69,14 @@ int mtk_vcodec_init_enc_pm(struct mtk_vcodec_dev *mtkdev) enc_clk->clk_info = devm_kcalloc(&pdev->dev, enc_clk->clk_num, sizeof(*clk_info), GFP_KERNEL); - if (!enc_clk->clk_info) - return -ENOMEM; + if (!enc_clk->clk_info) { + ret = -ENOMEM; + goto put_larbvenclt; + } } else { mtk_v4l2_err("Failed to get venc clock count"); - return -EINVAL; + ret = -EINVAL; + goto put_larbvenclt; } for (i = 0; i < enc_clk->clk_num; i++) { @@ -80,17 +85,24 @@ int mtk_vcodec_init_enc_pm(struct mtk_vcodec_dev *mtkdev) "clock-names", i, &clk_info->clk_name); if (ret) { mtk_v4l2_err("venc failed to get clk name %d", i); - return ret; + goto put_larbvenclt; } clk_info->vcodec_clk = devm_clk_get(&pdev->dev, clk_info->clk_name); if (IS_ERR(clk_info->vcodec_clk)) { mtk_v4l2_err("venc devm_clk_get (%d)%s fail", i, clk_info->clk_name); - return PTR_ERR(clk_info->vcodec_clk); + ret = PTR_ERR(clk_info->vcodec_clk); + goto put_larbvenclt; } } + return 0; + +put_larbvenclt: + put_device(pm->larbvenclt); +put_larbvenc: + put_device(pm->larbvenc); return ret; } diff --git a/drivers/media/platform/qcom/venus/core.c b/drivers/media/platform/qcom/venus/core.c index 6103aaf43987b0..d5bfd6fff85b41 100644 --- a/drivers/media/platform/qcom/venus/core.c +++ b/drivers/media/platform/qcom/venus/core.c @@ -355,12 +355,26 @@ static __maybe_unused int venus_runtime_suspend(struct device *dev) if (ret) return ret; + if (pm_ops->core_power) { + ret = pm_ops->core_power(dev, POWER_OFF); + if (ret) + return ret; + } + ret = icc_set_bw(core->cpucfg_path, 0, 0); if (ret) - return ret; + goto err_cpucfg_path; - if (pm_ops->core_power) - ret = pm_ops->core_power(dev, POWER_OFF); + ret = icc_set_bw(core->video_path, 0, 0); + if (ret) + goto err_video_path; + + return ret; + +err_video_path: + icc_set_bw(core->cpucfg_path, kbps_to_icc(1000), 0); +err_cpucfg_path: + pm_ops->core_power(dev, POWER_ON); return ret; } @@ -371,16 +385,20 @@ static __maybe_unused int venus_runtime_resume(struct device *dev) const struct venus_pm_ops *pm_ops = core->pm_ops; int ret; + ret = icc_set_bw(core->video_path, kbps_to_icc(20000), 0); + if (ret) + return ret; + + ret = icc_set_bw(core->cpucfg_path, kbps_to_icc(1000), 0); + if (ret) + return ret; + if (pm_ops->core_power) { ret = pm_ops->core_power(dev, POWER_ON); if (ret) return ret; } - ret = icc_set_bw(core->cpucfg_path, 0, kbps_to_icc(1000)); - if (ret) - return ret; - return hfi_core_resume(core, false); } diff --git a/drivers/media/platform/qcom/venus/pm_helpers.c b/drivers/media/platform/qcom/venus/pm_helpers.c index a9538c2cc3c9dc..2946547a0df4a4 100644 --- a/drivers/media/platform/qcom/venus/pm_helpers.c +++ b/drivers/media/platform/qcom/venus/pm_helpers.c @@ -212,6 +212,16 @@ static int load_scale_bw(struct venus_core *core) } mutex_unlock(&core->lock); + /* + * keep minimum bandwidth vote for "video-mem" path, + * so that clks can be disabled during vdec_session_release(). + * Actual bandwidth drop will be done during device supend + * so that device can power down without any warnings. + */ + + if (!total_avg && !total_peak) + total_avg = kbps_to_icc(1000); + dev_dbg(core->dev, VDBGL "total: avg_bw: %u, peak_bw: %u\n", total_avg, total_peak); diff --git a/drivers/media/rc/ir-mce_kbd-decoder.c b/drivers/media/rc/ir-mce_kbd-decoder.c index be8f2756a444e7..1524dc0fc566e2 100644 --- a/drivers/media/rc/ir-mce_kbd-decoder.c +++ b/drivers/media/rc/ir-mce_kbd-decoder.c @@ -320,7 +320,7 @@ static int ir_mce_kbd_decode(struct rc_dev *dev, struct ir_raw_event ev) data->body); spin_lock(&data->keylock); if (scancode) { - delay = nsecs_to_jiffies(dev->timeout) + + delay = usecs_to_jiffies(dev->timeout) + msecs_to_jiffies(100); mod_timer(&data->rx_timeout, jiffies + delay); } else { diff --git a/drivers/media/rc/ite-cir.c b/drivers/media/rc/ite-cir.c index a905113fef6eae..0c6229592e132d 100644 --- a/drivers/media/rc/ite-cir.c +++ b/drivers/media/rc/ite-cir.c @@ -1551,7 +1551,7 @@ static int ite_probe(struct pnp_dev *pdev, const struct pnp_device_id rdev->s_rx_carrier_range = ite_set_rx_carrier_range; /* FIFO threshold is 17 bytes, so 17 * 8 samples minimum */ rdev->min_timeout = 17 * 8 * ITE_BAUDRATE_DIVISOR * - itdev->params.sample_period; + itdev->params.sample_period / 1000; rdev->timeout = IR_DEFAULT_TIMEOUT; rdev->max_timeout = 10 * IR_DEFAULT_TIMEOUT; rdev->rx_resolution = ITE_BAUDRATE_DIVISOR * diff --git a/drivers/media/rc/rc-main.c b/drivers/media/rc/rc-main.c index 1d811e5ffb557f..1fd62c1dac768d 100644 --- a/drivers/media/rc/rc-main.c +++ b/drivers/media/rc/rc-main.c @@ -737,7 +737,7 @@ static unsigned int repeat_period(int protocol) void rc_repeat(struct rc_dev *dev) { unsigned long flags; - unsigned int timeout = nsecs_to_jiffies(dev->timeout) + + unsigned int timeout = usecs_to_jiffies(dev->timeout) + msecs_to_jiffies(repeat_period(dev->last_protocol)); struct lirc_scancode sc = { .scancode = dev->last_scancode, .rc_proto = dev->last_protocol, @@ -855,7 +855,7 @@ void rc_keydown(struct rc_dev *dev, enum rc_proto protocol, u64 scancode, ir_do_keydown(dev, protocol, scancode, keycode, toggle); if (dev->keypressed) { - dev->keyup_jiffies = jiffies + nsecs_to_jiffies(dev->timeout) + + dev->keyup_jiffies = jiffies + usecs_to_jiffies(dev->timeout) + msecs_to_jiffies(repeat_period(protocol)); mod_timer(&dev->timer_keyup, dev->keyup_jiffies); } @@ -1928,6 +1928,8 @@ int rc_register_device(struct rc_dev *dev) goto out_raw; } + dev->registered = true; + rc = device_add(&dev->dev); if (rc) goto out_rx_free; @@ -1937,8 +1939,6 @@ int rc_register_device(struct rc_dev *dev) dev->device_name ?: "Unspecified device", path ?: "N/A"); kfree(path); - dev->registered = true; - /* * once the the input device is registered in rc_setup_rx_device, * userspace can open the input device and rc_open() will be called diff --git a/drivers/media/rc/serial_ir.c b/drivers/media/rc/serial_ir.c index 8cc28c92d05d66..96ae0294ac102a 100644 --- a/drivers/media/rc/serial_ir.c +++ b/drivers/media/rc/serial_ir.c @@ -385,7 +385,7 @@ static irqreturn_t serial_ir_irq_handler(int i, void *blah) } while (!(sinp(UART_IIR) & UART_IIR_NO_INT)); /* still pending ? */ mod_timer(&serial_ir.timeout_timer, - jiffies + nsecs_to_jiffies(serial_ir.rcdev->timeout)); + jiffies + usecs_to_jiffies(serial_ir.rcdev->timeout)); ir_raw_event_handle(serial_ir.rcdev); diff --git a/drivers/media/rc/sunxi-cir.c b/drivers/media/rc/sunxi-cir.c index ddee6ee37bab11..4afc5895bee74e 100644 --- a/drivers/media/rc/sunxi-cir.c +++ b/drivers/media/rc/sunxi-cir.c @@ -137,6 +137,8 @@ static irqreturn_t sunxi_ir_irq(int irqno, void *dev_id) } else if (status & REG_RXSTA_RPE) { ir_raw_event_set_idle(ir->rc, true); ir_raw_event_handle(ir->rc); + } else { + ir_raw_event_handle(ir->rc); } spin_unlock(&ir->ir_lock); diff --git a/drivers/media/usb/dvb-usb/gp8psk.c b/drivers/media/usb/dvb-usb/gp8psk.c index c07f46f5176ea7..b4f661bb564816 100644 --- a/drivers/media/usb/dvb-usb/gp8psk.c +++ b/drivers/media/usb/dvb-usb/gp8psk.c @@ -182,7 +182,7 @@ static int gp8psk_load_bcm4500fw(struct dvb_usb_device *d) static int gp8psk_power_ctrl(struct dvb_usb_device *d, int onoff) { - u8 status, buf; + u8 status = 0, buf; int gp_product_id = le16_to_cpu(d->udev->descriptor.idProduct); if (onoff) { diff --git a/drivers/media/usb/gspca/gspca.c b/drivers/media/usb/gspca/gspca.c index c295f642d352cd..158c8e28ed2cc6 100644 --- a/drivers/media/usb/gspca/gspca.c +++ b/drivers/media/usb/gspca/gspca.c @@ -1575,6 +1575,7 @@ int gspca_dev_probe2(struct usb_interface *intf, input_unregister_device(gspca_dev->input_dev); #endif v4l2_ctrl_handler_free(gspca_dev->vdev.ctrl_handler); + v4l2_device_unregister(&gspca_dev->v4l2_dev); kfree(gspca_dev->usb_buf); kfree(gspca_dev); return ret; diff --git a/drivers/media/usb/msi2500/msi2500.c b/drivers/media/usb/msi2500/msi2500.c index 65be6f140fe836..1c60dfb647e5c8 100644 --- a/drivers/media/usb/msi2500/msi2500.c +++ b/drivers/media/usb/msi2500/msi2500.c @@ -1230,7 +1230,7 @@ static int msi2500_probe(struct usb_interface *intf, } dev->master = master; - master->bus_num = 0; + master->bus_num = -1; master->num_chipselect = 1; master->transfer_one_message = msi2500_transfer_one_message; spi_master_set_devdata(master, dev); diff --git a/drivers/media/usb/tm6000/tm6000-video.c b/drivers/media/usb/tm6000/tm6000-video.c index bfba06ea60e9d1..2df736c029d6e6 100644 --- a/drivers/media/usb/tm6000/tm6000-video.c +++ b/drivers/media/usb/tm6000/tm6000-video.c @@ -461,11 +461,12 @@ static int tm6000_alloc_urb_buffers(struct tm6000_core *dev) if (dev->urb_buffer) return 0; - dev->urb_buffer = kmalloc_array(num_bufs, sizeof(void *), GFP_KERNEL); + dev->urb_buffer = kmalloc_array(num_bufs, sizeof(*dev->urb_buffer), + GFP_KERNEL); if (!dev->urb_buffer) return -ENOMEM; - dev->urb_dma = kmalloc_array(num_bufs, sizeof(dma_addr_t *), + dev->urb_dma = kmalloc_array(num_bufs, sizeof(*dev->urb_dma), GFP_KERNEL); if (!dev->urb_dma) return -ENOMEM; diff --git a/drivers/media/v4l2-core/v4l2-fwnode.c b/drivers/media/v4l2-core/v4l2-fwnode.c index d7bbe33840cb46..dfc53d11053fc6 100644 --- a/drivers/media/v4l2-core/v4l2-fwnode.c +++ b/drivers/media/v4l2-core/v4l2-fwnode.c @@ -93,7 +93,7 @@ v4l2_fwnode_bus_type_to_mbus(enum v4l2_fwnode_bus_type type) const struct v4l2_fwnode_bus_conv *conv = get_v4l2_fwnode_bus_conv_by_fwnode_bus(type); - return conv ? conv->mbus_type : V4L2_MBUS_UNKNOWN; + return conv ? conv->mbus_type : V4L2_MBUS_INVALID; } static const char * @@ -436,6 +436,10 @@ static int __v4l2_fwnode_endpoint_parse(struct fwnode_handle *fwnode, v4l2_fwnode_mbus_type_to_string(vep->bus_type), vep->bus_type); mbus_type = v4l2_fwnode_bus_type_to_mbus(bus_type); + if (mbus_type == V4L2_MBUS_INVALID) { + pr_debug("unsupported bus type %u\n", bus_type); + return -EINVAL; + } if (vep->bus_type != V4L2_MBUS_UNKNOWN) { if (mbus_type != V4L2_MBUS_UNKNOWN && diff --git a/drivers/memory/Kconfig b/drivers/memory/Kconfig index 00e013b14703ed..cc2c83e1accfb3 100644 --- a/drivers/memory/Kconfig +++ b/drivers/memory/Kconfig @@ -128,7 +128,7 @@ config OMAP_GPMC_DEBUG config TI_EMIF_SRAM tristate "Texas Instruments EMIF SRAM driver" - depends on SOC_AM33XX || SOC_AM43XX || (ARM && COMPILE_TEST) + depends on SOC_AM33XX || SOC_AM43XX || (ARM && CPU_V7 && COMPILE_TEST) depends on SRAM help This driver is for the EMIF module available on Texas Instruments diff --git a/drivers/memory/jz4780-nemc.c b/drivers/memory/jz4780-nemc.c index 3ec5cb0fce1ee1..555f7ac3b7dd9f 100644 --- a/drivers/memory/jz4780-nemc.c +++ b/drivers/memory/jz4780-nemc.c @@ -291,6 +291,8 @@ static int jz4780_nemc_probe(struct platform_device *pdev) nemc->dev = dev; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) + return -EINVAL; /* * The driver currently only uses the registers up to offset @@ -304,9 +306,9 @@ static int jz4780_nemc_probe(struct platform_device *pdev) } nemc->base = devm_ioremap(dev, res->start, NEMC_REG_LEN); - if (IS_ERR(nemc->base)) { + if (!nemc->base) { dev_err(dev, "failed to get I/O memory\n"); - return PTR_ERR(nemc->base); + return -ENOMEM; } writel(0, nemc->base + NEMC_NFCSR); diff --git a/drivers/memory/renesas-rpc-if.c b/drivers/memory/renesas-rpc-if.c index f2a33a1af83612..da0fdb4c759593 100644 --- a/drivers/memory/renesas-rpc-if.c +++ b/drivers/memory/renesas-rpc-if.c @@ -212,7 +212,7 @@ EXPORT_SYMBOL(rpcif_enable_rpm); void rpcif_disable_rpm(struct rpcif *rpc) { - pm_runtime_put_sync(rpc->dev); + pm_runtime_disable(rpc->dev); } EXPORT_SYMBOL(rpcif_disable_rpm); @@ -508,7 +508,8 @@ int rpcif_manual_xfer(struct rpcif *rpc) return ret; err_out: - ret = reset_control_reset(rpc->rstc); + if (reset_control_reset(rpc->rstc)) + dev_err(rpc->dev, "Failed to reset HW\n"); rpcif_hw_init(rpc, rpc->bus_size == 2); goto exit; } @@ -560,9 +561,11 @@ static int rpcif_probe(struct platform_device *pdev) } else if (of_device_is_compatible(flash, "cfi-flash")) { name = "rpc-if-hyperflash"; } else { + of_node_put(flash); dev_warn(&pdev->dev, "unknown flash type\n"); return -ENODEV; } + of_node_put(flash); vdev = platform_device_alloc(name, pdev->id); if (!vdev) diff --git a/drivers/memstick/core/memstick.c b/drivers/memstick/core/memstick.c index ef03d6fafc5ce8..12bc3f5a6cbbd5 100644 --- a/drivers/memstick/core/memstick.c +++ b/drivers/memstick/core/memstick.c @@ -468,7 +468,6 @@ static void memstick_check(struct work_struct *work) host->card = card; if (device_register(&card->dev)) { put_device(&card->dev); - kfree(host->card); host->card = NULL; } } else diff --git a/drivers/memstick/host/r592.c b/drivers/memstick/host/r592.c index dd3a1f3dcc1919..d2ef46337191ca 100644 --- a/drivers/memstick/host/r592.c +++ b/drivers/memstick/host/r592.c @@ -759,8 +759,10 @@ static int r592_probe(struct pci_dev *pdev, const struct pci_device_id *id) goto error3; dev->mmio = pci_ioremap_bar(pdev, 0); - if (!dev->mmio) + if (!dev->mmio) { + error = -ENOMEM; goto error4; + } dev->irq = pdev->irq; spin_lock_init(&dev->irq_lock); @@ -786,12 +788,14 @@ static int r592_probe(struct pci_dev *pdev, const struct pci_device_id *id) &dev->dummy_dma_page_physical_address, GFP_KERNEL); r592_stop_dma(dev , 0); - if (request_irq(dev->irq, &r592_irq, IRQF_SHARED, - DRV_NAME, dev)) + error = request_irq(dev->irq, &r592_irq, IRQF_SHARED, + DRV_NAME, dev); + if (error) goto error6; r592_update_card_detect(dev); - if (memstick_add_host(host)) + error = memstick_add_host(host); + if (error) goto error7; message("driver successfully loaded"); diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig index 8b99a13669bfc5..4789507f325b82 100644 --- a/drivers/mfd/Kconfig +++ b/drivers/mfd/Kconfig @@ -1189,6 +1189,7 @@ config MFD_SIMPLE_MFD_I2C config MFD_SL28CPLD tristate "Kontron sl28cpld Board Management Controller" depends on I2C + depends on ARCH_LAYERSCAPE || COMPILE_TEST select MFD_SIMPLE_MFD_I2C help Say yes here to enable support for the Kontron sl28cpld board diff --git a/drivers/mfd/htc-i2cpld.c b/drivers/mfd/htc-i2cpld.c index 247f9849e54ae8..417b0355d904d4 100644 --- a/drivers/mfd/htc-i2cpld.c +++ b/drivers/mfd/htc-i2cpld.c @@ -346,6 +346,7 @@ static int htcpld_register_chip_i2c( if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_READ_BYTE_DATA)) { dev_warn(dev, "i2c adapter %d non-functional\n", pdata->i2c_adapter_id); + i2c_put_adapter(adapter); return -EINVAL; } @@ -360,6 +361,7 @@ static int htcpld_register_chip_i2c( /* I2C device registration failed, contineu with the next */ dev_warn(dev, "Unable to add I2C device for 0x%x\n", plat_chip_data->addr); + i2c_put_adapter(adapter); return PTR_ERR(client); } diff --git a/drivers/mfd/motorola-cpcap.c b/drivers/mfd/motorola-cpcap.c index 2283d88adcc254..30d82bfe5b02fa 100644 --- a/drivers/mfd/motorola-cpcap.c +++ b/drivers/mfd/motorola-cpcap.c @@ -97,7 +97,7 @@ static struct regmap_irq_chip cpcap_irq_chip[CPCAP_NR_IRQ_CHIPS] = { .ack_base = CPCAP_REG_MI1, .mask_base = CPCAP_REG_MIM1, .use_ack = true, - .ack_invert = true, + .clear_ack = true, }, { .name = "cpcap-m2", @@ -106,7 +106,7 @@ static struct regmap_irq_chip cpcap_irq_chip[CPCAP_NR_IRQ_CHIPS] = { .ack_base = CPCAP_REG_MI2, .mask_base = CPCAP_REG_MIM2, .use_ack = true, - .ack_invert = true, + .clear_ack = true, }, { .name = "cpcap1-4", @@ -115,7 +115,7 @@ static struct regmap_irq_chip cpcap_irq_chip[CPCAP_NR_IRQ_CHIPS] = { .ack_base = CPCAP_REG_INT1, .mask_base = CPCAP_REG_INTM1, .use_ack = true, - .ack_invert = true, + .clear_ack = true, }, }; diff --git a/drivers/mfd/stmfx.c b/drivers/mfd/stmfx.c index 5e680bfdf5c90e..988e2ba6dd0f33 100644 --- a/drivers/mfd/stmfx.c +++ b/drivers/mfd/stmfx.c @@ -329,11 +329,11 @@ static int stmfx_chip_init(struct i2c_client *client) stmfx->vdd = devm_regulator_get_optional(&client->dev, "vdd"); ret = PTR_ERR_OR_ZERO(stmfx->vdd); - if (ret == -ENODEV) { - stmfx->vdd = NULL; - } else { - return dev_err_probe(&client->dev, ret, - "Failed to get VDD regulator\n"); + if (ret) { + if (ret == -ENODEV) + stmfx->vdd = NULL; + else + return dev_err_probe(&client->dev, ret, "Failed to get VDD regulator\n"); } if (stmfx->vdd) { diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index 783bbdcb1e6183..71b3a4d5adc65b 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -1027,6 +1027,7 @@ int hl_device_reset(struct hl_device *hdev, bool hard_reset, GFP_KERNEL); if (!hdev->kernel_ctx) { rc = -ENOMEM; + hl_mmu_fini(hdev); goto out_err; } @@ -1038,6 +1039,7 @@ int hl_device_reset(struct hl_device *hdev, bool hard_reset, "failed to init kernel ctx in hard reset\n"); kfree(hdev->kernel_ctx); hdev->kernel_ctx = NULL; + hl_mmu_fini(hdev); goto out_err; } } @@ -1423,6 +1425,15 @@ void hl_device_fini(struct hl_device *hdev) } } + /* Disable PCI access from device F/W so it won't send us additional + * interrupts. We disable MSI/MSI-X at the halt_engines function and we + * can't have the F/W sending us interrupts after that. We need to + * disable the access here because if the device is marked disable, the + * message won't be send. Also, in case of heartbeat, the device CPU is + * marked as disable so this message won't be sent + */ + hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS); + /* Mark device as disabled */ hdev->disabled = true; diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c index cd41c7ceb0e78c..13c6eebd4fa635 100644 --- a/drivers/misc/habanalabs/common/firmware_if.c +++ b/drivers/misc/habanalabs/common/firmware_if.c @@ -385,6 +385,10 @@ int hl_fw_cpucp_pci_counters_get(struct hl_device *hdev, } counters->rx_throughput = result; + memset(&pkt, 0, sizeof(pkt)); + pkt.ctl = cpu_to_le32(CPUCP_PACKET_PCIE_THROUGHPUT_GET << + CPUCP_PKT_CTL_OPCODE_SHIFT); + /* Fetch PCI tx counter */ pkt.index = cpu_to_le32(cpucp_pcie_throughput_tx); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), @@ -397,6 +401,7 @@ int hl_fw_cpucp_pci_counters_get(struct hl_device *hdev, counters->tx_throughput = result; /* Fetch PCI replay counter */ + memset(&pkt, 0, sizeof(pkt)); pkt.ctl = cpu_to_le32(CPUCP_PACKET_PCIE_REPLAY_CNT_GET << CPUCP_PKT_CTL_OPCODE_SHIFT); diff --git a/drivers/misc/habanalabs/common/habanalabs_drv.c b/drivers/misc/habanalabs/common/habanalabs_drv.c index f9067d3ef43765..3bcef64a677aeb 100644 --- a/drivers/misc/habanalabs/common/habanalabs_drv.c +++ b/drivers/misc/habanalabs/common/habanalabs_drv.c @@ -528,6 +528,7 @@ static struct pci_driver hl_pci_driver = { .id_table = ids, .probe = hl_pci_probe, .remove = hl_pci_remove, + .shutdown = hl_pci_remove, .driver.pm = &hl_pm_ops, .err_handler = &hl_pci_err_handler, }; diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c index 07317ea4912956..35401148969f52 100644 --- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c +++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c @@ -133,6 +133,8 @@ static int hw_idle(struct hl_device *hdev, struct hl_info_args *args) hw_idle.is_idle = hdev->asic_funcs->is_device_idle(hdev, &hw_idle.busy_engines_mask_ext, NULL); + hw_idle.busy_engines_mask = + lower_32_bits(hw_idle.busy_engines_mask_ext); return copy_to_user(out, &hw_idle, min((size_t) max_size, sizeof(hw_idle))) ? -EFAULT : 0; diff --git a/drivers/misc/habanalabs/common/pci.c b/drivers/misc/habanalabs/common/pci.c index 4327e5704ebb69..607f9a11fba1a4 100644 --- a/drivers/misc/habanalabs/common/pci.c +++ b/drivers/misc/habanalabs/common/pci.c @@ -130,10 +130,8 @@ static int hl_pci_elbi_write(struct hl_device *hdev, u64 addr, u32 data) if ((val & PCI_CONFIG_ELBI_STS_MASK) == PCI_CONFIG_ELBI_STS_DONE) return 0; - if (val & PCI_CONFIG_ELBI_STS_ERR) { - dev_err(hdev->dev, "Error writing to ELBI\n"); + if (val & PCI_CONFIG_ELBI_STS_ERR) return -EIO; - } if (!(val & PCI_CONFIG_ELBI_STS_MASK)) { dev_err(hdev->dev, "ELBI write didn't finish in time\n"); @@ -160,8 +158,12 @@ int hl_pci_iatu_write(struct hl_device *hdev, u32 addr, u32 data) dbi_offset = addr & 0xFFF; - rc = hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0x00300000); - rc |= hl_pci_elbi_write(hdev, prop->pcie_dbi_base_address + dbi_offset, + /* Ignore result of writing to pcie_aux_dbi_reg_addr as it could fail + * in case the firmware security is enabled + */ + hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0x00300000); + + rc = hl_pci_elbi_write(hdev, prop->pcie_dbi_base_address + dbi_offset, data); if (rc) @@ -244,9 +246,11 @@ int hl_pci_set_inbound_region(struct hl_device *hdev, u8 region, rc |= hl_pci_iatu_write(hdev, offset + 0x4, ctrl_reg_val); - /* Return the DBI window to the default location */ - rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0); - rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr + 4, 0); + /* Return the DBI window to the default location + * Ignore result of writing to pcie_aux_dbi_reg_addr as it could fail + * in case the firmware security is enabled + */ + hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0); if (rc) dev_err(hdev->dev, "failed to map bar %u to 0x%08llx\n", @@ -294,9 +298,11 @@ int hl_pci_set_outbound_region(struct hl_device *hdev, /* Enable */ rc |= hl_pci_iatu_write(hdev, 0x004, 0x80000000); - /* Return the DBI window to the default location */ - rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0); - rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr + 4, 0); + /* Return the DBI window to the default location + * Ignore result of writing to pcie_aux_dbi_reg_addr as it could fail + * in case the firmware security is enabled + */ + hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0); return rc; } diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 7ea6b4368a9133..68f661aca3ff2e 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -754,11 +754,17 @@ static int gaudi_init_tpc_mem(struct hl_device *hdev) size_t fw_size; void *cpu_addr; dma_addr_t dma_handle; - int rc; + int rc, count = 5; +again: rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev); + if (rc == -EINTR && count-- > 0) { + msleep(50); + goto again; + } + if (rc) { - dev_err(hdev->dev, "Firmware file %s is not found!\n", + dev_err(hdev->dev, "Failed to load firmware file %s\n", GAUDI_TPC_FW_FILE); goto out; } @@ -2893,7 +2899,7 @@ static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout) static void gaudi_pre_hw_init(struct hl_device *hdev) { /* Perform read from the device to make sure device is up */ - RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG); + RREG32(mmHW_STATE); /* Set the access through PCI bars (Linux driver only) as * secured @@ -2996,7 +3002,7 @@ static int gaudi_hw_init(struct hl_device *hdev) } /* Perform read from the device to flush all configuration */ - RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG); + RREG32(mmHW_STATE); return 0; @@ -3113,7 +3119,8 @@ static int gaudi_cb_mmap(struct hl_device *hdev, struct vm_area_struct *vma, vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP | VM_DONTCOPY | VM_NORESERVE; - rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr, dma_addr, size); + rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr, + (dma_addr - HOST_PHYS_BASE), size); if (rc) dev_err(hdev->dev, "dma_mmap_coherent error %d", rc); diff --git a/drivers/misc/habanalabs/gaudi/gaudi_coresight.c b/drivers/misc/habanalabs/gaudi/gaudi_coresight.c index 3d2b0f0f46507b..283d37b76447e4 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi_coresight.c +++ b/drivers/misc/habanalabs/gaudi/gaudi_coresight.c @@ -9,6 +9,7 @@ #include "../include/gaudi/gaudi_coresight.h" #include "../include/gaudi/asic_reg/gaudi_regs.h" #include "../include/gaudi/gaudi_masks.h" +#include "../include/gaudi/gaudi_reg_map.h" #include #include @@ -876,7 +877,7 @@ int gaudi_debug_coresight(struct hl_device *hdev, void *data) } /* Perform read from the device to flush all configuration */ - RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG); + RREG32(mmHW_STATE); return rc; } diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index 235d47b2420f5b..986ed3c072088b 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -2675,7 +2675,8 @@ static int goya_cb_mmap(struct hl_device *hdev, struct vm_area_struct *vma, vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP | VM_DONTCOPY | VM_NORESERVE; - rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr, dma_addr, size); + rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr, + (dma_addr - HOST_PHYS_BASE), size); if (rc) dev_err(hdev->dev, "dma_mmap_coherent error %d", rc); diff --git a/drivers/misc/pci_endpoint_test.c b/drivers/misc/pci_endpoint_test.c index 146ca6fb3260f3..d3844730eacafa 100644 --- a/drivers/misc/pci_endpoint_test.c +++ b/drivers/misc/pci_endpoint_test.c @@ -811,8 +811,10 @@ static int pci_endpoint_test_probe(struct pci_dev *pdev, pci_set_master(pdev); - if (!pci_endpoint_test_alloc_irq_vectors(test, irq_type)) + if (!pci_endpoint_test_alloc_irq_vectors(test, irq_type)) { + err = -EINVAL; goto err_disable_irq; + } for (bar = 0; bar < PCI_STD_NUM_BARS; bar++) { if (pci_resource_flags(pdev, bar) & IORESOURCE_MEM) { @@ -849,8 +851,10 @@ static int pci_endpoint_test_probe(struct pci_dev *pdev, goto err_ida_remove; } - if (!pci_endpoint_test_request_irq(test)) + if (!pci_endpoint_test_request_irq(test)) { + err = -EINVAL; goto err_kfree_test_name; + } misc_device = &test->miscdev; misc_device->minor = MISC_DYNAMIC_MINOR; diff --git a/drivers/misc/vmw_vmci/vmci_context.c b/drivers/misc/vmw_vmci/vmci_context.c index 16695366ec926d..26ff49fdf0f7d3 100644 --- a/drivers/misc/vmw_vmci/vmci_context.c +++ b/drivers/misc/vmw_vmci/vmci_context.c @@ -743,7 +743,7 @@ static int vmci_ctx_get_chkpt_doorbells(struct vmci_ctx *context, return VMCI_ERROR_MORE_DATA; } - dbells = kmalloc(data_size, GFP_ATOMIC); + dbells = kzalloc(data_size, GFP_ATOMIC); if (!dbells) return VMCI_ERROR_NO_MEM; diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c index de7cb0369c308f..002426e3cf76c9 100644 --- a/drivers/mmc/core/queue.c +++ b/drivers/mmc/core/queue.c @@ -384,8 +384,10 @@ static void mmc_setup_queue(struct mmc_queue *mq, struct mmc_card *card) "merging was advertised but not possible"); blk_queue_max_segments(mq->queue, mmc_get_max_segments(host)); - if (mmc_card_mmc(card)) + if (mmc_card_mmc(card) && card->ext_csd.data_sector_size) { block_size = card->ext_csd.data_sector_size; + WARN_ON(block_size != 512 && block_size != 4096); + } blk_queue_logical_block_size(mq->queue, block_size); /* diff --git a/drivers/mmc/host/pxamci.c b/drivers/mmc/host/pxamci.c index 29f6180a00363c..316393c694d7a6 100644 --- a/drivers/mmc/host/pxamci.c +++ b/drivers/mmc/host/pxamci.c @@ -731,6 +731,7 @@ static int pxamci_probe(struct platform_device *pdev) host->power = devm_gpiod_get_optional(dev, "power", GPIOD_OUT_LOW); if (IS_ERR(host->power)) { + ret = PTR_ERR(host->power); dev_err(dev, "Failed requesting gpio_power\n"); goto out; } diff --git a/drivers/mmc/host/sdhci-brcmstb.c b/drivers/mmc/host/sdhci-brcmstb.c index bbf3496f44955f..f9780c65ebe98a 100644 --- a/drivers/mmc/host/sdhci-brcmstb.c +++ b/drivers/mmc/host/sdhci-brcmstb.c @@ -314,11 +314,7 @@ static int sdhci_brcmstb_probe(struct platform_device *pdev) static void sdhci_brcmstb_shutdown(struct platform_device *pdev) { - int ret; - - ret = sdhci_pltfm_unregister(pdev); - if (ret) - dev_err(&pdev->dev, "failed to shutdown\n"); + sdhci_pltfm_suspend(&pdev->dev); } MODULE_DEVICE_TABLE(of, sdhci_brcm_of_match); diff --git a/drivers/mmc/host/sdhci-of-dwcmshc.c b/drivers/mmc/host/sdhci-of-dwcmshc.c index 4b673792b5a42e..d90020ed362273 100644 --- a/drivers/mmc/host/sdhci-of-dwcmshc.c +++ b/drivers/mmc/host/sdhci-of-dwcmshc.c @@ -16,6 +16,8 @@ #include "sdhci-pltfm.h" +#define SDHCI_DWCMSHC_ARG2_STUFF GENMASK(31, 16) + /* DWCMSHC specific Mode Select value */ #define DWCMSHC_CTRL_HS400 0x7 @@ -49,6 +51,29 @@ static void dwcmshc_adma_write_desc(struct sdhci_host *host, void **desc, sdhci_adma_write_desc(host, desc, addr, len, cmd); } +static void dwcmshc_check_auto_cmd23(struct mmc_host *mmc, + struct mmc_request *mrq) +{ + struct sdhci_host *host = mmc_priv(mmc); + + /* + * No matter V4 is enabled or not, ARGUMENT2 register is 32-bit + * block count register which doesn't support stuff bits of + * CMD23 argument on dwcmsch host controller. + */ + if (mrq->sbc && (mrq->sbc->arg & SDHCI_DWCMSHC_ARG2_STUFF)) + host->flags &= ~SDHCI_AUTO_CMD23; + else + host->flags |= SDHCI_AUTO_CMD23; +} + +static void dwcmshc_request(struct mmc_host *mmc, struct mmc_request *mrq) +{ + dwcmshc_check_auto_cmd23(mmc, mrq); + + sdhci_request(mmc, mrq); +} + static void dwcmshc_set_uhs_signaling(struct sdhci_host *host, unsigned int timing) { @@ -133,6 +158,8 @@ static int dwcmshc_probe(struct platform_device *pdev) sdhci_get_of_property(pdev); + host->mmc_host_ops.request = dwcmshc_request; + err = sdhci_add_host(host); if (err) goto err_clk; diff --git a/drivers/mmc/host/sdhci-tegra.c b/drivers/mmc/host/sdhci-tegra.c index ed12aacb1c7364..41d193fa77bbfe 100644 --- a/drivers/mmc/host/sdhci-tegra.c +++ b/drivers/mmc/host/sdhci-tegra.c @@ -1272,7 +1272,7 @@ static void tegra_sdhci_set_timeout(struct sdhci_host *host, * busy wait mode. */ val = sdhci_readl(host, SDHCI_TEGRA_VENDOR_MISC_CTRL); - if (cmd && cmd->busy_timeout >= 11 * HZ) + if (cmd && cmd->busy_timeout >= 11 * MSEC_PER_SEC) val |= SDHCI_MISC_CTRL_ERASE_TIMEOUT_LIMIT; else val &= ~SDHCI_MISC_CTRL_ERASE_TIMEOUT_LIMIT; diff --git a/drivers/mmc/host/sdhci-xenon.c b/drivers/mmc/host/sdhci-xenon.c index 24c978de2a3f18..0e5234a5ca2247 100644 --- a/drivers/mmc/host/sdhci-xenon.c +++ b/drivers/mmc/host/sdhci-xenon.c @@ -167,7 +167,12 @@ static void xenon_reset_exit(struct sdhci_host *host, /* Disable tuning request and auto-retuning again */ xenon_retune_setup(host); - xenon_set_acg(host, true); + /* + * The ACG should be turned off at the early init time, in order + * to solve a possible issues with the 1.8V regulator stabilization. + * The feature is enabled in later stage. + */ + xenon_set_acg(host, false); xenon_set_sdclk_off_idle(host, sdhc_id, false); diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c index e9e163ae9d863c..b07cbb0661fb18 100644 --- a/drivers/mtd/mtdcore.c +++ b/drivers/mtd/mtdcore.c @@ -993,6 +993,8 @@ int __get_mtd_device(struct mtd_info *mtd) } } + master->usecount++; + while (mtd->parent) { mtd->usecount++; mtd = mtd->parent; @@ -1059,6 +1061,8 @@ void __put_mtd_device(struct mtd_info *mtd) mtd = mtd->parent; } + master->usecount--; + if (master->_put_device) master->_put_device(master); diff --git a/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c b/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c index dc8104e6750621..31a6210eb5d44b 100644 --- a/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c +++ b/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c @@ -149,8 +149,10 @@ static int gpmi_init(struct gpmi_nand_data *this) int ret; ret = pm_runtime_get_sync(this->dev); - if (ret < 0) + if (ret < 0) { + pm_runtime_put_noidle(this->dev); return ret; + } ret = gpmi_reset_block(r->gpmi_regs, false); if (ret) @@ -1611,7 +1613,7 @@ static int gpmi_ecc_read_page_raw(struct nand_chip *chip, uint8_t *buf, /* Extract interleaved payload data and ECC bits */ for (step = 0; step < nfc_geo->ecc_chunk_count; step++) { if (buf) - nand_extract_bits(buf, step * eccsize, tmp_buf, + nand_extract_bits(buf, step * eccsize * 8, tmp_buf, src_bit_off, eccsize * 8); src_bit_off += eccsize * 8; @@ -2252,7 +2254,7 @@ static int gpmi_nfc_exec_op(struct nand_chip *chip, void *buf_read = NULL; const void *buf_write = NULL; bool direct = false; - struct completion *completion; + struct completion *dma_completion, *bch_completion; unsigned long to; if (check_only) @@ -2263,8 +2265,10 @@ static int gpmi_nfc_exec_op(struct nand_chip *chip, this->transfers[i].direction = DMA_NONE; ret = pm_runtime_get_sync(this->dev); - if (ret < 0) + if (ret < 0) { + pm_runtime_put_noidle(this->dev); return ret; + } /* * This driver currently supports only one NAND chip. Plus, dies share @@ -2347,22 +2351,24 @@ static int gpmi_nfc_exec_op(struct nand_chip *chip, this->resources.bch_regs + HW_BCH_FLASH0LAYOUT1); } + desc->callback = dma_irq_callback; + desc->callback_param = this; + dma_completion = &this->dma_done; + bch_completion = NULL; + + init_completion(dma_completion); + if (this->bch && buf_read) { writel(BM_BCH_CTRL_COMPLETE_IRQ_EN, this->resources.bch_regs + HW_BCH_CTRL_SET); - completion = &this->bch_done; - } else { - desc->callback = dma_irq_callback; - desc->callback_param = this; - completion = &this->dma_done; + bch_completion = &this->bch_done; + init_completion(bch_completion); } - init_completion(completion); - dmaengine_submit(desc); dma_async_issue_pending(get_dma_chan(this)); - to = wait_for_completion_timeout(completion, msecs_to_jiffies(1000)); + to = wait_for_completion_timeout(dma_completion, msecs_to_jiffies(1000)); if (!to) { dev_err(this->dev, "DMA timeout, last DMA\n"); gpmi_dump_info(this); @@ -2370,6 +2376,16 @@ static int gpmi_nfc_exec_op(struct nand_chip *chip, goto unmap; } + if (this->bch && buf_read) { + to = wait_for_completion_timeout(bch_completion, msecs_to_jiffies(1000)); + if (!to) { + dev_err(this->dev, "BCH timeout, last DMA\n"); + gpmi_dump_info(this); + ret = -ETIMEDOUT; + goto unmap; + } + } + writel(BM_BCH_CTRL_COMPLETE_IRQ_EN, this->resources.bch_regs + HW_BCH_CTRL_CLR); gpmi_clear_bch(this); diff --git a/drivers/mtd/nand/raw/meson_nand.c b/drivers/mtd/nand/raw/meson_nand.c index 48e6dac96be6d7..817bddccb775f6 100644 --- a/drivers/mtd/nand/raw/meson_nand.c +++ b/drivers/mtd/nand/raw/meson_nand.c @@ -510,7 +510,7 @@ static int meson_nfc_dma_buffer_setup(struct nand_chip *nand, void *databuf, } static void meson_nfc_dma_buffer_release(struct nand_chip *nand, - int infolen, int datalen, + int datalen, int infolen, enum dma_data_direction dir) { struct meson_nfc *nfc = nand_get_controller_data(nand); @@ -1044,9 +1044,12 @@ static int meson_nfc_clk_init(struct meson_nfc *nfc) ret = clk_set_rate(nfc->device_clk, 24000000); if (ret) - goto err_phase_rx; + goto err_disable_rx; return 0; + +err_disable_rx: + clk_disable_unprepare(nfc->phase_rx); err_phase_rx: clk_disable_unprepare(nfc->phase_tx); err_phase_tx: diff --git a/drivers/mtd/nand/raw/nandsim.c b/drivers/mtd/nand/raw/nandsim.c index a8048cb8d22057..9a9f1c24d83219 100644 --- a/drivers/mtd/nand/raw/nandsim.c +++ b/drivers/mtd/nand/raw/nandsim.c @@ -2211,6 +2211,9 @@ static int ns_attach_chip(struct nand_chip *chip) { unsigned int eccsteps, eccbytes; + chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT; + chip->ecc.algo = bch ? NAND_ECC_ALGO_BCH : NAND_ECC_ALGO_HAMMING; + if (!bch) return 0; @@ -2234,8 +2237,6 @@ static int ns_attach_chip(struct nand_chip *chip) return -EINVAL; } - chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT; - chip->ecc.algo = NAND_ECC_ALGO_BCH; chip->ecc.size = 512; chip->ecc.strength = bch; chip->ecc.bytes = eccbytes; @@ -2274,8 +2275,6 @@ static int __init ns_init_module(void) nsmtd = nand_to_mtd(chip); nand_set_controller_data(chip, (void *)ns); - chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT; - chip->ecc.algo = NAND_ECC_ALGO_HAMMING; /* The NAND_SKIP_BBTSCAN option is necessary for 'overridesize' */ /* and 'badblocks' parameters to work */ chip->options |= NAND_SKIP_BBTSCAN; diff --git a/drivers/mtd/nand/raw/qcom_nandc.c b/drivers/mtd/nand/raw/qcom_nandc.c index 777fb0de068012..dfc17a28a06b97 100644 --- a/drivers/mtd/nand/raw/qcom_nandc.c +++ b/drivers/mtd/nand/raw/qcom_nandc.c @@ -1570,6 +1570,8 @@ static int check_flash_errors(struct qcom_nand_host *host, int cw_cnt) struct qcom_nand_controller *nandc = get_qcom_nand_controller(chip); int i; + nandc_read_buffer_sync(nandc, true); + for (i = 0; i < cw_cnt; i++) { u32 flash = le32_to_cpu(nandc->reg_read_buf[i]); diff --git a/drivers/mtd/parsers/cmdlinepart.c b/drivers/mtd/parsers/cmdlinepart.c index a79e4d866b08a4..0ddff1a4b51fbf 100644 --- a/drivers/mtd/parsers/cmdlinepart.c +++ b/drivers/mtd/parsers/cmdlinepart.c @@ -226,7 +226,7 @@ static int mtdpart_setup_real(char *s) struct cmdline_mtd_partition *this_mtd; struct mtd_partition *parts; int mtd_id_len, num_parts; - char *p, *mtd_id, *semicol; + char *p, *mtd_id, *semicol, *open_parenth; /* * Replace the first ';' by a NULL char so strrchr can work @@ -236,6 +236,14 @@ static int mtdpart_setup_real(char *s) if (semicol) *semicol = '\0'; + /* + * make sure that part-names with ":" will not be handled as + * part of the mtd-id with an ":" + */ + open_parenth = strchr(s, '('); + if (open_parenth) + *open_parenth = '\0'; + mtd_id = s; /* @@ -245,6 +253,10 @@ static int mtdpart_setup_real(char *s) */ p = strrchr(s, ':'); + /* Restore the '(' now. */ + if (open_parenth) + *open_parenth = '('; + /* Restore the ';' now. */ if (semicol) *semicol = ';'; diff --git a/drivers/mtd/spi-nor/atmel.c b/drivers/mtd/spi-nor/atmel.c index 3f5f21a473a69d..deacf87a68a067 100644 --- a/drivers/mtd/spi-nor/atmel.c +++ b/drivers/mtd/spi-nor/atmel.c @@ -8,39 +8,78 @@ #include "core.h" +/* + * The Atmel AT25FS010/AT25FS040 parts have some weird configuration for the + * block protection bits. We don't support them. But legacy behavior in linux + * is to unlock the whole flash array on startup. Therefore, we have to support + * exactly this operation. + */ +static int atmel_at25fs_lock(struct spi_nor *nor, loff_t ofs, uint64_t len) +{ + return -EOPNOTSUPP; +} + +static int atmel_at25fs_unlock(struct spi_nor *nor, loff_t ofs, uint64_t len) +{ + int ret; + + /* We only support unlocking the whole flash array */ + if (ofs || len != nor->params->size) + return -EINVAL; + + /* Write 0x00 to the status register to disable write protection */ + ret = spi_nor_write_sr_and_check(nor, 0); + if (ret) + dev_dbg(nor->dev, "unable to clear BP bits, WP# asserted?\n"); + + return ret; +} + +static int atmel_at25fs_is_locked(struct spi_nor *nor, loff_t ofs, uint64_t len) +{ + return -EOPNOTSUPP; +} + +static const struct spi_nor_locking_ops atmel_at25fs_locking_ops = { + .lock = atmel_at25fs_lock, + .unlock = atmel_at25fs_unlock, + .is_locked = atmel_at25fs_is_locked, +}; + +static void atmel_at25fs_default_init(struct spi_nor *nor) +{ + nor->params->locking_ops = &atmel_at25fs_locking_ops; +} + +static const struct spi_nor_fixups atmel_at25fs_fixups = { + .default_init = atmel_at25fs_default_init, +}; + static const struct flash_info atmel_parts[] = { /* Atmel -- some are (confusingly) marketed as "DataFlash" */ - { "at25fs010", INFO(0x1f6601, 0, 32 * 1024, 4, SECT_4K) }, - { "at25fs040", INFO(0x1f6604, 0, 64 * 1024, 8, SECT_4K) }, + { "at25fs010", INFO(0x1f6601, 0, 32 * 1024, 4, SECT_4K | SPI_NOR_HAS_LOCK) + .fixups = &atmel_at25fs_fixups }, + { "at25fs040", INFO(0x1f6604, 0, 64 * 1024, 8, SECT_4K | SPI_NOR_HAS_LOCK) + .fixups = &atmel_at25fs_fixups }, - { "at25df041a", INFO(0x1f4401, 0, 64 * 1024, 8, SECT_4K) }, - { "at25df321", INFO(0x1f4700, 0, 64 * 1024, 64, SECT_4K) }, - { "at25df321a", INFO(0x1f4701, 0, 64 * 1024, 64, SECT_4K) }, - { "at25df641", INFO(0x1f4800, 0, 64 * 1024, 128, SECT_4K) }, + { "at25df041a", INFO(0x1f4401, 0, 64 * 1024, 8, SECT_4K | SPI_NOR_HAS_LOCK) }, + { "at25df321", INFO(0x1f4700, 0, 64 * 1024, 64, SECT_4K | SPI_NOR_HAS_LOCK) }, + { "at25df321a", INFO(0x1f4701, 0, 64 * 1024, 64, SECT_4K | SPI_NOR_HAS_LOCK) }, + { "at25df641", INFO(0x1f4800, 0, 64 * 1024, 128, SECT_4K | SPI_NOR_HAS_LOCK) }, { "at25sl321", INFO(0x1f4216, 0, 64 * 1024, 64, SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ) }, { "at26f004", INFO(0x1f0400, 0, 64 * 1024, 8, SECT_4K) }, - { "at26df081a", INFO(0x1f4501, 0, 64 * 1024, 16, SECT_4K) }, - { "at26df161a", INFO(0x1f4601, 0, 64 * 1024, 32, SECT_4K) }, - { "at26df321", INFO(0x1f4700, 0, 64 * 1024, 64, SECT_4K) }, + { "at26df081a", INFO(0x1f4501, 0, 64 * 1024, 16, SECT_4K | SPI_NOR_HAS_LOCK) }, + { "at26df161a", INFO(0x1f4601, 0, 64 * 1024, 32, SECT_4K | SPI_NOR_HAS_LOCK) }, + { "at26df321", INFO(0x1f4700, 0, 64 * 1024, 64, SECT_4K | SPI_NOR_HAS_LOCK) }, { "at45db081d", INFO(0x1f2500, 0, 64 * 1024, 16, SECT_4K) }, }; -static void atmel_default_init(struct spi_nor *nor) -{ - nor->flags |= SNOR_F_HAS_LOCK; -} - -static const struct spi_nor_fixups atmel_fixups = { - .default_init = atmel_default_init, -}; - const struct spi_nor_manufacturer spi_nor_atmel = { .name = "atmel", .parts = atmel_parts, .nparts = ARRAY_SIZE(atmel_parts), - .fixups = &atmel_fixups, }; diff --git a/drivers/mtd/spi-nor/core.c b/drivers/mtd/spi-nor/core.c index f0ae7a01703a1d..ad6c79d9a7f86c 100644 --- a/drivers/mtd/spi-nor/core.c +++ b/drivers/mtd/spi-nor/core.c @@ -906,7 +906,7 @@ static int spi_nor_write_16bit_cr_and_check(struct spi_nor *nor, u8 cr) * * Return: 0 on success, -errno otherwise. */ -static int spi_nor_write_sr_and_check(struct spi_nor *nor, u8 sr1) +int spi_nor_write_sr_and_check(struct spi_nor *nor, u8 sr1) { if (nor->flags & SNOR_F_HAS_16BIT_SR) return spi_nor_write_16bit_sr_and_check(nor, sr1); @@ -2915,20 +2915,27 @@ static int spi_nor_quad_enable(struct spi_nor *nor) } /** - * spi_nor_unlock_all() - Unlocks the entire flash memory array. + * spi_nor_try_unlock_all() - Tries to unlock the entire flash memory array. * @nor: pointer to a 'struct spi_nor'. * * Some SPI NOR flashes are write protected by default after a power-on reset * cycle, in order to avoid inadvertent writes during power-up. Backward * compatibility imposes to unlock the entire flash memory array at power-up * by default. + * + * Unprotecting the entire flash array will fail for boards which are hardware + * write-protected. Thus any errors are ignored. */ -static int spi_nor_unlock_all(struct spi_nor *nor) +static void spi_nor_try_unlock_all(struct spi_nor *nor) { - if (nor->flags & SNOR_F_HAS_LOCK) - return spi_nor_unlock(&nor->mtd, 0, nor->params->size); + int ret; - return 0; + if (!(nor->flags & SNOR_F_HAS_LOCK)) + return; + + ret = spi_nor_unlock(&nor->mtd, 0, nor->params->size); + if (ret) + dev_dbg(nor->dev, "Failed to unlock the entire flash memory array\n"); } static int spi_nor_init(struct spi_nor *nor) @@ -2941,11 +2948,7 @@ static int spi_nor_init(struct spi_nor *nor) return err; } - err = spi_nor_unlock_all(nor); - if (err) { - dev_dbg(nor->dev, "Failed to unlock the entire flash memory array\n"); - return err; - } + spi_nor_try_unlock_all(nor); if (nor->addr_width == 4 && !(nor->flags & SNOR_F_4B_OPCODES)) { /* diff --git a/drivers/mtd/spi-nor/core.h b/drivers/mtd/spi-nor/core.h index 6f2f6b27173fd5..6f62ee861231ae 100644 --- a/drivers/mtd/spi-nor/core.h +++ b/drivers/mtd/spi-nor/core.h @@ -409,6 +409,7 @@ void spi_nor_unlock_and_unprep(struct spi_nor *nor); int spi_nor_sr1_bit6_quad_enable(struct spi_nor *nor); int spi_nor_sr2_bit1_quad_enable(struct spi_nor *nor); int spi_nor_sr2_bit7_quad_enable(struct spi_nor *nor); +int spi_nor_write_sr_and_check(struct spi_nor *nor, u8 sr1); int spi_nor_xread_sr(struct spi_nor *nor, u8 *sr); ssize_t spi_nor_read_data(struct spi_nor *nor, loff_t from, size_t len, diff --git a/drivers/mtd/spi-nor/sst.c b/drivers/mtd/spi-nor/sst.c index e0af6d25d573b1..0ab07624fb73fd 100644 --- a/drivers/mtd/spi-nor/sst.c +++ b/drivers/mtd/spi-nor/sst.c @@ -18,7 +18,8 @@ static const struct flash_info sst_parts[] = { SECT_4K | SST_WRITE) }, { "sst25vf032b", INFO(0xbf254a, 0, 64 * 1024, 64, SECT_4K | SST_WRITE) }, - { "sst25vf064c", INFO(0xbf254b, 0, 64 * 1024, 128, SECT_4K) }, + { "sst25vf064c", INFO(0xbf254b, 0, 64 * 1024, 128, + SECT_4K | SPI_NOR_4BIT_BP) }, { "sst25wf512", INFO(0xbf2501, 0, 64 * 1024, 1, SECT_4K | SST_WRITE) }, { "sst25wf010", INFO(0xbf2502, 0, 64 * 1024, 2, diff --git a/drivers/net/bareudp.c b/drivers/net/bareudp.c index ff0bea1554f9bb..59c1724bcd0ed3 100644 --- a/drivers/net/bareudp.c +++ b/drivers/net/bareudp.c @@ -380,7 +380,7 @@ static int bareudp6_xmit_skb(struct sk_buff *skb, struct net_device *dev, goto free_dst; min_headroom = LL_RESERVED_SPACE(dst->dev) + dst->header_len + - BAREUDP_BASE_HLEN + info->options_len + sizeof(struct iphdr); + BAREUDP_BASE_HLEN + info->options_len + sizeof(struct ipv6hdr); err = skb_cow_head(skb, min_headroom); if (unlikely(err)) @@ -534,6 +534,7 @@ static void bareudp_setup(struct net_device *dev) SET_NETDEV_DEVTYPE(dev, &bareudp_type); dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM; dev->features |= NETIF_F_RXCSUM; + dev->features |= NETIF_F_LLTX; dev->features |= NETIF_F_GSO_SOFTWARE; dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_RXCSUM; dev->hw_features |= NETIF_F_GSO_SOFTWARE; @@ -644,11 +645,20 @@ static int bareudp_link_config(struct net_device *dev, return 0; } +static void bareudp_dellink(struct net_device *dev, struct list_head *head) +{ + struct bareudp_dev *bareudp = netdev_priv(dev); + + list_del(&bareudp->next); + unregister_netdevice_queue(dev, head); +} + static int bareudp_newlink(struct net *net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { struct bareudp_conf conf; + LIST_HEAD(list_kill); int err; err = bareudp2info(data, &conf, extack); @@ -661,17 +671,14 @@ static int bareudp_newlink(struct net *net, struct net_device *dev, err = bareudp_link_config(dev, tb); if (err) - return err; + goto err_unconfig; return 0; -} - -static void bareudp_dellink(struct net_device *dev, struct list_head *head) -{ - struct bareudp_dev *bareudp = netdev_priv(dev); - list_del(&bareudp->next); - unregister_netdevice_queue(dev, head); +err_unconfig: + bareudp_dellink(dev, &list_kill); + unregister_netdevice_many(&list_kill); + return err; } static size_t bareudp_get_size(const struct net_device *dev) diff --git a/drivers/net/can/Kconfig b/drivers/net/can/Kconfig index 424970939fd4c8..1c28eade6becc3 100644 --- a/drivers/net/can/Kconfig +++ b/drivers/net/can/Kconfig @@ -123,6 +123,7 @@ config CAN_JANZ_ICAN3 config CAN_KVASER_PCIEFD depends on PCI tristate "Kvaser PCIe FD cards" + select CRC32 help This is a driver for the Kvaser PCI Express CAN FD family. diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c index 81e39d7507d8fa..24cd3c1027ecca 100644 --- a/drivers/net/can/dev.c +++ b/drivers/net/can/dev.c @@ -592,11 +592,11 @@ static void can_restart(struct net_device *dev) cf->can_id |= CAN_ERR_RESTARTED; - netif_rx_ni(skb); - stats->rx_packets++; stats->rx_bytes += cf->can_dlc; + netif_rx_ni(skb); + restart: netdev_dbg(dev, "restarted\n"); priv->can_stats.restarts++; @@ -1163,7 +1163,7 @@ static int can_fill_info(struct sk_buff *skb, const struct net_device *dev) { struct can_priv *priv = netdev_priv(dev); struct can_ctrlmode cm = {.flags = priv->ctrlmode}; - struct can_berr_counter bec; + struct can_berr_counter bec = { }; enum can_state state = priv->state; if (priv->do_get_state) diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index 61a93b19203799..3c1e379751683f 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -380,10 +380,6 @@ void m_can_config_endisable(struct m_can_classdev *cdev, bool enable) cccr &= ~CCCR_CSR; if (enable) { - /* Clear the Clock stop request if it was set */ - if (cccr & CCCR_CSR) - cccr &= ~CCCR_CSR; - /* enable m_can configuration */ m_can_write(cdev, M_CAN_CCCR, cccr | CCCR_INIT); udelay(5); @@ -1918,8 +1914,6 @@ EXPORT_SYMBOL_GPL(m_can_class_resume); void m_can_class_unregister(struct m_can_classdev *m_can_dev) { unregister_candev(m_can_dev->net); - - m_can_clk_stop(m_can_dev); } EXPORT_SYMBOL_GPL(m_can_class_unregister); diff --git a/drivers/net/can/m_can/tcan4x5x.c b/drivers/net/can/m_can/tcan4x5x.c index 7347ab39c5b657..f726c5112294f6 100644 --- a/drivers/net/can/m_can/tcan4x5x.c +++ b/drivers/net/can/m_can/tcan4x5x.c @@ -129,30 +129,6 @@ struct tcan4x5x_priv { int reg_offset; }; -static struct can_bittiming_const tcan4x5x_bittiming_const = { - .name = DEVICE_NAME, - .tseg1_min = 2, - .tseg1_max = 31, - .tseg2_min = 2, - .tseg2_max = 16, - .sjw_max = 16, - .brp_min = 1, - .brp_max = 32, - .brp_inc = 1, -}; - -static struct can_bittiming_const tcan4x5x_data_bittiming_const = { - .name = DEVICE_NAME, - .tseg1_min = 1, - .tseg1_max = 32, - .tseg2_min = 1, - .tseg2_max = 16, - .sjw_max = 16, - .brp_min = 1, - .brp_max = 32, - .brp_inc = 1, -}; - static void tcan4x5x_check_wake(struct tcan4x5x_priv *priv) { int wake_state = 0; @@ -479,8 +455,6 @@ static int tcan4x5x_can_probe(struct spi_device *spi) mcan_class->dev = &spi->dev; mcan_class->ops = &tcan4x5x_ops; mcan_class->is_peripheral = true; - mcan_class->bit_timing = &tcan4x5x_bittiming_const; - mcan_class->data_timing = &tcan4x5x_data_bittiming_const; mcan_class->net->irq = spi->irq; spi_set_drvdata(spi, priv); diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c index 8a39be076e143e..59de6b3b5f026d 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c @@ -1432,7 +1432,7 @@ mcp251xfd_handle_rxif_one(struct mcp251xfd_priv *priv, else skb = alloc_can_skb(priv->ndev, (struct can_frame **)&cfd); - if (!cfd) { + if (!skb) { stats->rx_dropped++; return 0; } diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_fd.c b/drivers/net/can/usb/peak_usb/pcan_usb_fd.c index d29d20525588c6..d565922838186f 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb_fd.c +++ b/drivers/net/can/usb/peak_usb/pcan_usb_fd.c @@ -512,11 +512,11 @@ static int pcan_usb_fd_decode_canmsg(struct pcan_usb_fd_if *usb_if, else memcpy(cfd->data, rm->d, cfd->len); - peak_usb_netif_rx(skb, &usb_if->time_ref, le32_to_cpu(rm->ts_low)); - netdev->stats.rx_packets++; netdev->stats.rx_bytes += cfd->len; + peak_usb_netif_rx(skb, &usb_if->time_ref, le32_to_cpu(rm->ts_low)); + return 0; } @@ -578,11 +578,11 @@ static int pcan_usb_fd_decode_status(struct pcan_usb_fd_if *usb_if, if (!skb) return -ENOMEM; - peak_usb_netif_rx(skb, &usb_if->time_ref, le32_to_cpu(sm->ts_low)); - netdev->stats.rx_packets++; netdev->stats.rx_bytes += cf->can_dlc; + peak_usb_netif_rx(skb, &usb_if->time_ref, le32_to_cpu(sm->ts_low)); + return 0; } diff --git a/drivers/net/can/vxcan.c b/drivers/net/can/vxcan.c index d6ba9426be4deb..b1baa4ac1d5378 100644 --- a/drivers/net/can/vxcan.c +++ b/drivers/net/can/vxcan.c @@ -39,6 +39,7 @@ static netdev_tx_t vxcan_xmit(struct sk_buff *skb, struct net_device *dev) struct net_device *peer; struct canfd_frame *cfd = (struct canfd_frame *)skb->data; struct net_device_stats *peerstats, *srcstats = &dev->stats; + u8 len; if (can_dropped_invalid_skb(dev, skb)) return NETDEV_TX_OK; @@ -61,12 +62,13 @@ static netdev_tx_t vxcan_xmit(struct sk_buff *skb, struct net_device *dev) skb->dev = peer; skb->ip_summed = CHECKSUM_UNNECESSARY; + len = cfd->len; if (netif_rx_ni(skb) == NET_RX_SUCCESS) { srcstats->tx_packets++; - srcstats->tx_bytes += cfd->len; + srcstats->tx_bytes += len; peerstats = &peer->stats; peerstats->rx_packets++; - peerstats->rx_bytes += cfd->len; + peerstats->rx_bytes += len; } out_unlock: diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 288b5a5c3e0dbc..95c7fa171e35ac 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -1404,7 +1404,7 @@ int b53_vlan_prepare(struct dsa_switch *ds, int port, !(vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED)) return -EINVAL; - if (vlan->vid_end > dev->num_vlans) + if (vlan->vid_end >= dev->num_vlans) return -ERANGE; b53_enable_vlan(dev, true, ds->vlan_filtering); diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 1e9a0adda2d692..445226720ff299 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -509,15 +509,19 @@ static int bcm_sf2_mdio_register(struct dsa_switch *ds) /* Find our integrated MDIO bus node */ dn = of_find_compatible_node(NULL, NULL, "brcm,unimac-mdio"); priv->master_mii_bus = of_mdio_find_bus(dn); - if (!priv->master_mii_bus) + if (!priv->master_mii_bus) { + of_node_put(dn); return -EPROBE_DEFER; + } get_device(&priv->master_mii_bus->dev); priv->master_mii_dn = dn; priv->slave_mii_bus = devm_mdiobus_alloc(ds->dev); - if (!priv->slave_mii_bus) + if (!priv->slave_mii_bus) { + of_node_put(dn); return -ENOMEM; + } priv->slave_mii_bus->priv = priv; priv->slave_mii_bus->name = "sf2 slave mii"; diff --git a/drivers/net/dsa/lantiq_gswip.c b/drivers/net/dsa/lantiq_gswip.c index 09701c17f3f63e..662e68a0e7e61d 100644 --- a/drivers/net/dsa/lantiq_gswip.c +++ b/drivers/net/dsa/lantiq_gswip.c @@ -92,9 +92,7 @@ GSWIP_MDIO_PHY_FDUP_MASK) /* GSWIP MII Registers */ -#define GSWIP_MII_CFG0 0x00 -#define GSWIP_MII_CFG1 0x02 -#define GSWIP_MII_CFG5 0x04 +#define GSWIP_MII_CFGp(p) (0x2 * (p)) #define GSWIP_MII_CFG_EN BIT(14) #define GSWIP_MII_CFG_LDCLKDIS BIT(12) #define GSWIP_MII_CFG_MODE_MIIP 0x0 @@ -392,17 +390,9 @@ static void gswip_mii_mask(struct gswip_priv *priv, u32 clear, u32 set, static void gswip_mii_mask_cfg(struct gswip_priv *priv, u32 clear, u32 set, int port) { - switch (port) { - case 0: - gswip_mii_mask(priv, clear, set, GSWIP_MII_CFG0); - break; - case 1: - gswip_mii_mask(priv, clear, set, GSWIP_MII_CFG1); - break; - case 5: - gswip_mii_mask(priv, clear, set, GSWIP_MII_CFG5); - break; - } + /* There's no MII_CFG register for the CPU port */ + if (!dsa_is_cpu_port(priv->ds, port)) + gswip_mii_mask(priv, clear, set, GSWIP_MII_CFGp(port)); } static void gswip_mii_mask_pcdu(struct gswip_priv *priv, u32 clear, u32 set, @@ -822,9 +812,8 @@ static int gswip_setup(struct dsa_switch *ds) gswip_mdio_mask(priv, 0xff, 0x09, GSWIP_MDIO_MDC_CFG1); /* Disable the xMII link */ - gswip_mii_mask_cfg(priv, GSWIP_MII_CFG_EN, 0, 0); - gswip_mii_mask_cfg(priv, GSWIP_MII_CFG_EN, 0, 1); - gswip_mii_mask_cfg(priv, GSWIP_MII_CFG_EN, 0, 5); + for (i = 0; i < priv->hw_info->max_ports; i++) + gswip_mii_mask_cfg(priv, GSWIP_MII_CFG_EN, 0, i); /* enable special tag insertion on cpu port */ gswip_switch_mask(priv, 0, GSWIP_FDMA_PCTRL_STEN, @@ -1447,11 +1436,12 @@ static void gswip_phylink_validate(struct dsa_switch *ds, int port, phylink_set(mask, Pause); phylink_set(mask, Asym_Pause); - /* With the exclusion of MII and Reverse MII, we support Gigabit, - * including Half duplex + /* With the exclusion of MII, Reverse MII and Reduced MII, we + * support Gigabit, including Half duplex */ if (state->interface != PHY_INTERFACE_MODE_MII && - state->interface != PHY_INTERFACE_MODE_REVMII) { + state->interface != PHY_INTERFACE_MODE_REVMII && + state->interface != PHY_INTERFACE_MODE_RMII) { phylink_set(mask, 1000baseT_Full); phylink_set(mask, 1000baseT_Half); } @@ -1541,9 +1531,7 @@ static void gswip_phylink_mac_link_up(struct dsa_switch *ds, int port, { struct gswip_priv *priv = ds->priv; - /* Enable the xMII interface only for the external PHY */ - if (interface != PHY_INTERFACE_MODE_INTERNAL) - gswip_mii_mask_cfg(priv, 0, GSWIP_MII_CFG_EN, port); + gswip_mii_mask_cfg(priv, 0, GSWIP_MII_CFG_EN, port); } static void gswip_get_strings(struct dsa_switch *ds, int port, u32 stringset, diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c index 0ef854911f215f..d4a64dbde31573 100644 --- a/drivers/net/dsa/microchip/ksz_common.c +++ b/drivers/net/dsa/microchip/ksz_common.c @@ -400,7 +400,7 @@ int ksz_switch_register(struct ksz_device *dev, gpiod_set_value_cansleep(dev->reset_gpio, 1); usleep_range(10000, 12000); gpiod_set_value_cansleep(dev->reset_gpio, 0); - usleep_range(100, 1000); + msleep(100); } mutex_init(&dev->dev_mutex); diff --git a/drivers/net/dsa/mv88e6xxx/global1_vtu.c b/drivers/net/dsa/mv88e6xxx/global1_vtu.c index 1048509a849bca..0938caccc62aca 100644 --- a/drivers/net/dsa/mv88e6xxx/global1_vtu.c +++ b/drivers/net/dsa/mv88e6xxx/global1_vtu.c @@ -351,6 +351,10 @@ int mv88e6250_g1_vtu_getnext(struct mv88e6xxx_chip *chip, if (err) return err; + err = mv88e6185_g1_stu_data_read(chip, entry); + if (err) + return err; + /* VTU DBNum[3:0] are located in VTU Operation 3:0 * VTU DBNum[5:4] are located in VTU Operation 9:8 */ diff --git a/drivers/net/dsa/qca/ar9331.c b/drivers/net/dsa/qca/ar9331.c index e24a99031b80f2..4d49c5f2b79056 100644 --- a/drivers/net/dsa/qca/ar9331.c +++ b/drivers/net/dsa/qca/ar9331.c @@ -159,6 +159,8 @@ struct ar9331_sw_priv { struct dsa_switch ds; struct dsa_switch_ops ops; struct irq_domain *irqdomain; + u32 irq_mask; + struct mutex lock_irq; struct mii_bus *mbus; /* mdio master */ struct mii_bus *sbus; /* mdio slave */ struct regmap *regmap; @@ -520,32 +522,44 @@ static irqreturn_t ar9331_sw_irq(int irq, void *data) static void ar9331_sw_mask_irq(struct irq_data *d) { struct ar9331_sw_priv *priv = irq_data_get_irq_chip_data(d); - struct regmap *regmap = priv->regmap; - int ret; - ret = regmap_update_bits(regmap, AR9331_SW_REG_GINT_MASK, - AR9331_SW_GINT_PHY_INT, 0); - if (ret) - dev_err(priv->dev, "could not mask IRQ\n"); + priv->irq_mask = 0; } static void ar9331_sw_unmask_irq(struct irq_data *d) +{ + struct ar9331_sw_priv *priv = irq_data_get_irq_chip_data(d); + + priv->irq_mask = AR9331_SW_GINT_PHY_INT; +} + +static void ar9331_sw_irq_bus_lock(struct irq_data *d) +{ + struct ar9331_sw_priv *priv = irq_data_get_irq_chip_data(d); + + mutex_lock(&priv->lock_irq); +} + +static void ar9331_sw_irq_bus_sync_unlock(struct irq_data *d) { struct ar9331_sw_priv *priv = irq_data_get_irq_chip_data(d); struct regmap *regmap = priv->regmap; int ret; ret = regmap_update_bits(regmap, AR9331_SW_REG_GINT_MASK, - AR9331_SW_GINT_PHY_INT, - AR9331_SW_GINT_PHY_INT); + AR9331_SW_GINT_PHY_INT, priv->irq_mask); if (ret) - dev_err(priv->dev, "could not unmask IRQ\n"); + dev_err(priv->dev, "failed to change IRQ mask\n"); + + mutex_unlock(&priv->lock_irq); } static struct irq_chip ar9331_sw_irq_chip = { .name = AR9331_SW_NAME, .irq_mask = ar9331_sw_mask_irq, .irq_unmask = ar9331_sw_unmask_irq, + .irq_bus_lock = ar9331_sw_irq_bus_lock, + .irq_bus_sync_unlock = ar9331_sw_irq_bus_sync_unlock, }; static int ar9331_sw_irq_map(struct irq_domain *domain, unsigned int irq, @@ -584,6 +598,7 @@ static int ar9331_sw_irq_init(struct ar9331_sw_priv *priv) return irq ? irq : -EINVAL; } + mutex_init(&priv->lock_irq); ret = devm_request_threaded_irq(dev, irq, NULL, ar9331_sw_irq, IRQF_ONESHOT, AR9331_SW_NAME, priv); if (ret) { diff --git a/drivers/net/ethernet/allwinner/sun4i-emac.c b/drivers/net/ethernet/allwinner/sun4i-emac.c index 862ea44beea772..5ed80d9a6b9fe1 100644 --- a/drivers/net/ethernet/allwinner/sun4i-emac.c +++ b/drivers/net/ethernet/allwinner/sun4i-emac.c @@ -828,13 +828,13 @@ static int emac_probe(struct platform_device *pdev) db->clk = devm_clk_get(&pdev->dev, NULL); if (IS_ERR(db->clk)) { ret = PTR_ERR(db->clk); - goto out_iounmap; + goto out_dispose_mapping; } ret = clk_prepare_enable(db->clk); if (ret) { dev_err(&pdev->dev, "Error couldn't enable clock (%d)\n", ret); - goto out_iounmap; + goto out_dispose_mapping; } ret = sunxi_sram_claim(&pdev->dev); @@ -893,6 +893,8 @@ static int emac_probe(struct platform_device *pdev) sunxi_sram_release(&pdev->dev); out_clk_disable_unprepare: clk_disable_unprepare(db->clk); +out_dispose_mapping: + irq_dispose_mapping(ndev->irq); out_iounmap: iounmap(db->membase); out: @@ -911,6 +913,7 @@ static int emac_remove(struct platform_device *pdev) unregister_netdev(ndev); sunxi_sram_release(&pdev->dev); clk_disable_unprepare(db->clk); + irq_dispose_mapping(ndev->irq); iounmap(db->membase); free_netdev(ndev); diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index 0fdd19d99d99fd..0404aafd5ce56d 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -2503,8 +2503,10 @@ static int bcm_sysport_probe(struct platform_device *pdev) priv = netdev_priv(dev); priv->clk = devm_clk_get_optional(&pdev->dev, "sw_sysport"); - if (IS_ERR(priv->clk)) - return PTR_ERR(priv->clk); + if (IS_ERR(priv->clk)) { + ret = PTR_ERR(priv->clk); + goto err_free_netdev; + } /* Allocate number of TX rings */ priv->tx_rings = devm_kcalloc(&pdev->dev, txq, @@ -2577,6 +2579,7 @@ static int bcm_sysport_probe(struct platform_device *pdev) NETIF_F_HW_VLAN_CTAG_TX; dev->hw_features |= dev->features; dev->vlan_features |= dev->features; + dev->max_mtu = UMAC_MAX_MTU_SIZE; /* Request the WOL interrupt and advertise suspend if available */ priv->wol_irq_disabled = 1; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 0af0af2b70fe43..033bfab24ef2f7 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -6790,8 +6790,10 @@ static int bnxt_hwrm_func_backing_store_qcaps(struct bnxt *bp) ctx->tqm_fp_rings_count = resp->tqm_fp_rings_count; if (!ctx->tqm_fp_rings_count) ctx->tqm_fp_rings_count = bp->max_q; + else if (ctx->tqm_fp_rings_count > BNXT_MAX_TQM_FP_RINGS) + ctx->tqm_fp_rings_count = BNXT_MAX_TQM_FP_RINGS; - tqm_rings = ctx->tqm_fp_rings_count + 1; + tqm_rings = ctx->tqm_fp_rings_count + BNXT_MAX_TQM_SP_RINGS; ctx_pg = kcalloc(tqm_rings, sizeof(*ctx_pg), GFP_KERNEL); if (!ctx_pg) { kfree(ctx); @@ -6925,7 +6927,8 @@ static int bnxt_hwrm_func_backing_store_cfg(struct bnxt *bp, u32 enables) pg_attr = &req.tqm_sp_pg_size_tqm_sp_lvl, pg_dir = &req.tqm_sp_page_dir, ena = FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_SP; - i < 9; i++, num_entries++, pg_attr++, pg_dir++, ena <<= 1) { + i < BNXT_MAX_TQM_RINGS; + i++, num_entries++, pg_attr++, pg_dir++, ena <<= 1) { if (!(enables & ena)) continue; @@ -12887,10 +12890,10 @@ static pci_ers_result_t bnxt_io_error_detected(struct pci_dev *pdev, */ static pci_ers_result_t bnxt_io_slot_reset(struct pci_dev *pdev) { + pci_ers_result_t result = PCI_ERS_RESULT_DISCONNECT; struct net_device *netdev = pci_get_drvdata(pdev); struct bnxt *bp = netdev_priv(netdev); int err = 0, off; - pci_ers_result_t result = PCI_ERS_RESULT_DISCONNECT; netdev_info(bp->dev, "PCI Slot Reset\n"); @@ -12919,22 +12922,8 @@ static pci_ers_result_t bnxt_io_slot_reset(struct pci_dev *pdev) pci_save_state(pdev); err = bnxt_hwrm_func_reset(bp); - if (!err) { - err = bnxt_hwrm_func_qcaps(bp); - if (!err && netif_running(netdev)) - err = bnxt_open(netdev); - } - bnxt_ulp_start(bp, err); - if (!err) { - bnxt_reenable_sriov(bp); + if (!err) result = PCI_ERS_RESULT_RECOVERED; - } - } - - if (result != PCI_ERS_RESULT_RECOVERED) { - if (netif_running(netdev)) - dev_close(netdev); - pci_disable_device(pdev); } rtnl_unlock(); @@ -12952,10 +12941,21 @@ static pci_ers_result_t bnxt_io_slot_reset(struct pci_dev *pdev) static void bnxt_io_resume(struct pci_dev *pdev) { struct net_device *netdev = pci_get_drvdata(pdev); + struct bnxt *bp = netdev_priv(netdev); + int err; + netdev_info(bp->dev, "PCI Slot Resume\n"); rtnl_lock(); - netif_device_attach(netdev); + err = bnxt_hwrm_func_qcaps(bp); + if (!err && netif_running(netdev)) + err = bnxt_open(netdev); + + bnxt_ulp_start(bp, err); + if (!err) { + bnxt_reenable_sriov(bp); + netif_device_attach(netdev); + } rtnl_unlock(); } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 47b3c31278798b..e4e926c65118a8 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -1435,6 +1435,11 @@ struct bnxt_ctx_pg_info { struct bnxt_ctx_pg_info **ctx_pg_tbl; }; +#define BNXT_MAX_TQM_SP_RINGS 1 +#define BNXT_MAX_TQM_FP_RINGS 8 +#define BNXT_MAX_TQM_RINGS \ + (BNXT_MAX_TQM_SP_RINGS + BNXT_MAX_TQM_FP_RINGS) + struct bnxt_ctx_mem_info { u32 qp_max_entries; u16 qp_min_qp1_entries; @@ -1473,7 +1478,7 @@ struct bnxt_ctx_mem_info { struct bnxt_ctx_pg_info stat_mem; struct bnxt_ctx_pg_info mrav_mem; struct bnxt_ctx_pg_info tim_mem; - struct bnxt_ctx_pg_info *tqm_mem[9]; + struct bnxt_ctx_pg_info *tqm_mem[BNXT_MAX_TQM_RINGS]; }; struct bnxt_fw_health { diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c index 8c8368c2f335cb..64dbbb04b04346 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c @@ -222,8 +222,12 @@ int bnxt_get_ulp_msix_base(struct bnxt *bp) int bnxt_get_ulp_stat_ctxs(struct bnxt *bp) { - if (bnxt_ulp_registered(bp->edev, BNXT_ROCE_ULP)) - return BNXT_MIN_ROCE_STAT_CTXS; + if (bnxt_ulp_registered(bp->edev, BNXT_ROCE_ULP)) { + struct bnxt_en_dev *edev = bp->edev; + + if (edev->ulp_tbl[BNXT_ROCE_ULP].msix_requested) + return BNXT_MIN_ROCE_STAT_CTXS; + } return 0; } diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index be85dad2e3bc4d..fcca023f22e54f 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -4069,8 +4069,10 @@ static int bcmgenet_probe(struct platform_device *pdev) clk_disable_unprepare(priv->clk); err = register_netdev(dev); - if (err) + if (err) { + bcmgenet_mii_exit(dev); goto err; + } return err; diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_tcb.h b/drivers/net/ethernet/chelsio/cxgb4/t4_tcb.h index 92473dda55d9f8..22a0220123adeb 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_tcb.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_tcb.h @@ -40,6 +40,13 @@ #define TCB_L2T_IX_M 0xfffULL #define TCB_L2T_IX_V(x) ((x) << TCB_L2T_IX_S) +#define TCB_T_FLAGS_W 1 +#define TCB_T_FLAGS_S 0 +#define TCB_T_FLAGS_M 0xffffffffffffffffULL +#define TCB_T_FLAGS_V(x) ((__u64)(x) << TCB_T_FLAGS_S) + +#define TCB_FIELD_COOKIE_TFLAG 1 + #define TCB_SMAC_SEL_W 0 #define TCB_SMAC_SEL_S 24 #define TCB_SMAC_SEL_M 0xffULL diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls.h b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls.h index 2d3dfdd2a7163a..a7c72fd2f024bf 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls.h +++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls.h @@ -573,7 +573,11 @@ int send_tx_flowc_wr(struct sock *sk, int compl, void chtls_tcp_push(struct sock *sk, int flags); int chtls_push_frames(struct chtls_sock *csk, int comp); int chtls_set_tcb_tflag(struct sock *sk, unsigned int bit_pos, int val); +void chtls_set_tcb_field_rpl_skb(struct sock *sk, u16 word, + u64 mask, u64 val, u8 cookie, + int through_l2t); int chtls_setkey(struct chtls_sock *csk, u32 keylen, u32 mode, int cipher_type); +void chtls_set_quiesce_ctrl(struct sock *sk, int val); void skb_entail(struct sock *sk, struct sk_buff *skb, int flags); unsigned int keyid_to_addr(int start_addr, int keyid); void free_tls_keyid(struct sock *sk); diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c index 50e3a70e5a290a..5beec901713fb0 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c @@ -32,6 +32,7 @@ #include "chtls.h" #include "chtls_cm.h" #include "clip_tbl.h" +#include "t4_tcb.h" /* * State transitions and actions for close. Note that if we are in SYN_SENT @@ -267,7 +268,9 @@ static void chtls_send_reset(struct sock *sk, int mode, struct sk_buff *skb) if (sk->sk_state != TCP_SYN_RECV) chtls_send_abort(sk, mode, skb); else - goto out; + chtls_set_tcb_field_rpl_skb(sk, TCB_T_FLAGS_W, + TCB_T_FLAGS_V(TCB_T_FLAGS_M), 0, + TCB_FIELD_COOKIE_TFLAG, 1); return; out: @@ -621,7 +624,7 @@ static void chtls_reset_synq(struct listen_ctx *listen_ctx) while (!skb_queue_empty(&listen_ctx->synq)) { struct chtls_sock *csk = - container_of((struct synq *)__skb_dequeue + container_of((struct synq *)skb_peek (&listen_ctx->synq), struct chtls_sock, synq); struct sock *child = csk->sk; @@ -1109,6 +1112,7 @@ static struct sock *chtls_recv_sock(struct sock *lsk, const struct cpl_pass_accept_req *req, struct chtls_dev *cdev) { + struct adapter *adap = pci_get_drvdata(cdev->pdev); struct neighbour *n = NULL; struct inet_sock *newinet; const struct iphdr *iph; @@ -1118,9 +1122,10 @@ static struct sock *chtls_recv_sock(struct sock *lsk, struct dst_entry *dst; struct tcp_sock *tp; struct sock *newsk; + bool found = false; u16 port_id; int rxq_idx; - int step; + int step, i; iph = (const struct iphdr *)network_hdr; newsk = tcp_create_openreq_child(lsk, oreq, cdev->askb); @@ -1152,7 +1157,7 @@ static struct sock *chtls_recv_sock(struct sock *lsk, n = dst_neigh_lookup(dst, &ip6h->saddr); #endif } - if (!n) + if (!n || !n->dev) goto free_sk; ndev = n->dev; @@ -1161,6 +1166,13 @@ static struct sock *chtls_recv_sock(struct sock *lsk, if (is_vlan_dev(ndev)) ndev = vlan_dev_real_dev(ndev); + for_each_port(adap, i) + if (cdev->ports[i] == ndev) + found = true; + + if (!found) + goto free_dst; + port_id = cxgb4_port_idx(ndev); csk = chtls_sock_create(cdev); @@ -1237,6 +1249,7 @@ static struct sock *chtls_recv_sock(struct sock *lsk, free_csk: chtls_sock_release(&csk->kref); free_dst: + neigh_release(n); dst_release(dst); free_sk: inet_csk_prepare_forced_close(newsk); @@ -1386,7 +1399,7 @@ static void chtls_pass_accept_request(struct sock *sk, newsk = chtls_recv_sock(sk, oreq, network_hdr, req, cdev); if (!newsk) - goto free_oreq; + goto reject; if (chtls_get_module(newsk)) goto reject; @@ -1402,8 +1415,6 @@ static void chtls_pass_accept_request(struct sock *sk, kfree_skb(skb); return; -free_oreq: - chtls_reqsk_free(oreq); reject: mk_tid_release(reply_skb, 0, tid); cxgb4_ofld_send(cdev->lldi->ports[0], reply_skb); @@ -1588,6 +1599,11 @@ static int chtls_pass_establish(struct chtls_dev *cdev, struct sk_buff *skb) sk_wake_async(sk, 0, POLL_OUT); data = lookup_stid(cdev->tids, stid); + if (!data) { + /* listening server close */ + kfree_skb(skb); + goto unlock; + } lsk = ((struct listen_ctx *)data)->lsk; bh_lock_sock(lsk); @@ -1935,6 +1951,8 @@ static void chtls_close_con_rpl(struct sock *sk, struct sk_buff *skb) else if (tcp_sk(sk)->linger2 < 0 && !csk_flag_nochk(csk, CSK_ABORT_SHUTDOWN)) chtls_abort_conn(sk, skb); + else if (csk_flag_nochk(csk, CSK_TX_DATA_SENT)) + chtls_set_quiesce_ctrl(sk, 0); break; default: pr_info("close_con_rpl in bad state %d\n", sk->sk_state); @@ -1996,39 +2014,6 @@ static void t4_defer_reply(struct sk_buff *skb, struct chtls_dev *cdev, spin_unlock_bh(&cdev->deferq.lock); } -static void send_abort_rpl(struct sock *sk, struct sk_buff *skb, - struct chtls_dev *cdev, int status, int queue) -{ - struct cpl_abort_req_rss *req = cplhdr(skb); - struct sk_buff *reply_skb; - struct chtls_sock *csk; - - csk = rcu_dereference_sk_user_data(sk); - - reply_skb = alloc_skb(sizeof(struct cpl_abort_rpl), - GFP_KERNEL); - - if (!reply_skb) { - req->status = (queue << 1); - t4_defer_reply(skb, cdev, send_defer_abort_rpl); - return; - } - - set_abort_rpl_wr(reply_skb, GET_TID(req), status); - kfree_skb(skb); - - set_wr_txq(reply_skb, CPL_PRIORITY_DATA, queue); - if (csk_conn_inline(csk)) { - struct l2t_entry *e = csk->l2t_entry; - - if (e && sk->sk_state != TCP_SYN_RECV) { - cxgb4_l2t_send(csk->egress_dev, reply_skb, e); - return; - } - } - cxgb4_ofld_send(cdev->lldi->ports[0], reply_skb); -} - static void chtls_send_abort_rpl(struct sock *sk, struct sk_buff *skb, struct chtls_dev *cdev, int status, int queue) @@ -2077,9 +2062,9 @@ static void bl_abort_syn_rcv(struct sock *lsk, struct sk_buff *skb) queue = csk->txq_idx; skb->sk = NULL; + chtls_send_abort_rpl(child, skb, BLOG_SKB_CB(skb)->cdev, + CPL_ABORT_NO_RST, queue); do_abort_syn_rcv(child, lsk); - send_abort_rpl(child, skb, BLOG_SKB_CB(skb)->cdev, - CPL_ABORT_NO_RST, queue); } static int abort_syn_rcv(struct sock *sk, struct sk_buff *skb) @@ -2109,8 +2094,8 @@ static int abort_syn_rcv(struct sock *sk, struct sk_buff *skb) if (!sock_owned_by_user(psk)) { int queue = csk->txq_idx; + chtls_send_abort_rpl(sk, skb, cdev, CPL_ABORT_NO_RST, queue); do_abort_syn_rcv(sk, psk); - send_abort_rpl(sk, skb, cdev, CPL_ABORT_NO_RST, queue); } else { skb->sk = sk; BLOG_SKB_CB(skb)->backlog_rcv = bl_abort_syn_rcv; @@ -2128,9 +2113,6 @@ static void chtls_abort_req_rss(struct sock *sk, struct sk_buff *skb) int queue = csk->txq_idx; if (is_neg_adv(req->status)) { - if (sk->sk_state == TCP_SYN_RECV) - chtls_set_tcb_tflag(sk, 0, 0); - kfree_skb(skb); return; } @@ -2157,12 +2139,12 @@ static void chtls_abort_req_rss(struct sock *sk, struct sk_buff *skb) if (sk->sk_state == TCP_SYN_RECV && !abort_syn_rcv(sk, skb)) return; - chtls_release_resources(sk); - chtls_conn_done(sk); } chtls_send_abort_rpl(sk, skb, BLOG_SKB_CB(skb)->cdev, rst_status, queue); + chtls_release_resources(sk); + chtls_conn_done(sk); } static void chtls_abort_rpl_rss(struct sock *sk, struct sk_buff *skb) @@ -2314,6 +2296,28 @@ static int chtls_wr_ack(struct chtls_dev *cdev, struct sk_buff *skb) return 0; } +static int chtls_set_tcb_rpl(struct chtls_dev *cdev, struct sk_buff *skb) +{ + struct cpl_set_tcb_rpl *rpl = cplhdr(skb) + RSS_HDR; + unsigned int hwtid = GET_TID(rpl); + struct sock *sk; + + sk = lookup_tid(cdev->tids, hwtid); + + /* return EINVAL if socket doesn't exist */ + if (!sk) + return -EINVAL; + + /* Reusing the skb as size of cpl_set_tcb_field structure + * is greater than cpl_abort_req + */ + if (TCB_COOKIE_G(rpl->cookie) == TCB_FIELD_COOKIE_TFLAG) + chtls_send_abort(sk, CPL_ABORT_SEND_RST, NULL); + + kfree_skb(skb); + return 0; +} + chtls_handler_func chtls_handlers[NUM_CPL_CMDS] = { [CPL_PASS_OPEN_RPL] = chtls_pass_open_rpl, [CPL_CLOSE_LISTSRV_RPL] = chtls_close_listsrv_rpl, @@ -2326,5 +2330,6 @@ chtls_handler_func chtls_handlers[NUM_CPL_CMDS] = { [CPL_CLOSE_CON_RPL] = chtls_conn_cpl, [CPL_ABORT_REQ_RSS] = chtls_conn_cpl, [CPL_ABORT_RPL_RSS] = chtls_conn_cpl, - [CPL_FW4_ACK] = chtls_wr_ack, + [CPL_FW4_ACK] = chtls_wr_ack, + [CPL_SET_TCB_RPL] = chtls_set_tcb_rpl, }; diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_hw.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_hw.c index a4fb463af22ac3..1e67140b0f8013 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_hw.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_hw.c @@ -88,6 +88,24 @@ static int chtls_set_tcb_field(struct sock *sk, u16 word, u64 mask, u64 val) return ret < 0 ? ret : 0; } +void chtls_set_tcb_field_rpl_skb(struct sock *sk, u16 word, + u64 mask, u64 val, u8 cookie, + int through_l2t) +{ + struct sk_buff *skb; + unsigned int wrlen; + + wrlen = sizeof(struct cpl_set_tcb_field) + sizeof(struct ulptx_idata); + wrlen = roundup(wrlen, 16); + + skb = alloc_skb(wrlen, GFP_KERNEL | __GFP_NOFAIL); + if (!skb) + return; + + __set_tcb_field(sk, skb, word, mask, val, cookie, 0); + send_or_defer(sk, tcp_sk(sk), skb, through_l2t); +} + /* * Set one of the t_flags bits in the TCB. */ @@ -113,6 +131,29 @@ static int chtls_set_tcb_quiesce(struct sock *sk, int val) TF_RX_QUIESCE_V(val)); } +void chtls_set_quiesce_ctrl(struct sock *sk, int val) +{ + struct chtls_sock *csk; + struct sk_buff *skb; + unsigned int wrlen; + int ret; + + wrlen = sizeof(struct cpl_set_tcb_field) + sizeof(struct ulptx_idata); + wrlen = roundup(wrlen, 16); + + skb = alloc_skb(wrlen, GFP_ATOMIC); + if (!skb) + return; + + csk = rcu_dereference_sk_user_data(sk); + + __set_tcb_field(sk, skb, 1, TF_RX_QUIESCE_V(1), 0, 0, 1); + set_wr_txq(skb, CPL_PRIORITY_CONTROL, csk->port_id); + ret = cxgb4_ofld_send(csk->egress_dev, skb); + if (ret < 0) + kfree_skb(skb); +} + /* TLS Key bitmap processing */ int chtls_init_kmap(struct chtls_dev *cdev, struct cxgb4_lld_info *lldi) { diff --git a/drivers/net/ethernet/ethoc.c b/drivers/net/ethernet/ethoc.c index 0981fe9652e501..3d9b0b161e241c 100644 --- a/drivers/net/ethernet/ethoc.c +++ b/drivers/net/ethernet/ethoc.c @@ -1211,7 +1211,7 @@ static int ethoc_probe(struct platform_device *pdev) ret = mdiobus_register(priv->mdio); if (ret) { dev_err(&netdev->dev, "failed to register MDIO bus\n"); - goto free2; + goto free3; } ret = ethoc_mdio_probe(netdev); @@ -1243,6 +1243,7 @@ static int ethoc_probe(struct platform_device *pdev) netif_napi_del(&priv->napi); error: mdiobus_unregister(priv->mdio); +free3: mdiobus_free(priv->mdio); free2: clk_disable_unprepare(priv->clk); diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c index cf9400a9886d7a..d880ab2a7d9629 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c @@ -878,7 +878,7 @@ static int dpaa2_eth_build_sg_fd_single_buf(struct dpaa2_eth_priv *priv, swa = (struct dpaa2_eth_swa *)sgt_buf; swa->type = DPAA2_ETH_SWA_SINGLE; swa->single.skb = skb; - swa->sg.sgt_size = sgt_buf_size; + swa->single.sgt_size = sgt_buf_size; /* Separately map the SGT buffer */ sgt_addr = dma_map_single(dev, sgt_buf, sgt_buf_size, DMA_BIDIRECTIONAL); diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 04f24c66cf3668..55c28fbc5f9eae 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -2165,9 +2165,9 @@ static int fec_enet_mii_init(struct platform_device *pdev) fep->mii_bus->parent = &pdev->dev; err = of_mdiobus_register(fep->mii_bus, node); - of_node_put(node); if (err) goto err_out_free_mdiobus; + of_node_put(node); mii_cnt++; @@ -2180,6 +2180,7 @@ static int fec_enet_mii_init(struct platform_device *pdev) err_out_free_mdiobus: mdiobus_free(fep->mii_bus); err_out: + of_node_put(node); return err; } diff --git a/drivers/net/ethernet/freescale/fs_enet/mii-bitbang.c b/drivers/net/ethernet/freescale/fs_enet/mii-bitbang.c index c8e5d889bd81fb..21de56345503f6 100644 --- a/drivers/net/ethernet/freescale/fs_enet/mii-bitbang.c +++ b/drivers/net/ethernet/freescale/fs_enet/mii-bitbang.c @@ -223,3 +223,4 @@ static struct platform_driver fs_enet_bb_mdio_driver = { }; module_platform_driver(fs_enet_bb_mdio_driver); +MODULE_LICENSE("GPL"); diff --git a/drivers/net/ethernet/freescale/fs_enet/mii-fec.c b/drivers/net/ethernet/freescale/fs_enet/mii-fec.c index 8b51ee142fa3c3..152f4d83765aad 100644 --- a/drivers/net/ethernet/freescale/fs_enet/mii-fec.c +++ b/drivers/net/ethernet/freescale/fs_enet/mii-fec.c @@ -224,3 +224,4 @@ static struct platform_driver fs_enet_fec_mdio_driver = { }; module_platform_driver(fs_enet_fec_mdio_driver); +MODULE_LICENSE("GPL"); diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c index ba8869c3d891c0..6d853f018d5313 100644 --- a/drivers/net/ethernet/freescale/ucc_geth.c +++ b/drivers/net/ethernet/freescale/ucc_geth.c @@ -3889,6 +3889,7 @@ static int ucc_geth_probe(struct platform_device* ofdev) INIT_WORK(&ugeth->timeout_work, ucc_geth_timeout_work); netif_napi_add(dev, &ugeth->napi, ucc_geth_poll, 64); dev->mtu = 1500; + dev->max_mtu = 1518; ugeth->msg_enable = netif_msg_init(debug.msg_enable, UGETH_MSG_DEFAULT); ugeth->phy_interface = phy_interface; @@ -3934,12 +3935,12 @@ static int ucc_geth_remove(struct platform_device* ofdev) struct device_node *np = ofdev->dev.of_node; unregister_netdev(dev); - free_netdev(dev); ucc_geth_memclean(ugeth); if (of_phy_is_fixed_link(np)) of_phy_deregister_fixed_link(np); of_node_put(ugeth->ug_info->tbi_node); of_node_put(ugeth->ug_info->phy_node); + free_netdev(dev); return 0; } diff --git a/drivers/net/ethernet/freescale/ucc_geth.h b/drivers/net/ethernet/freescale/ucc_geth.h index 3fe90397219527..c80bed2c995c15 100644 --- a/drivers/net/ethernet/freescale/ucc_geth.h +++ b/drivers/net/ethernet/freescale/ucc_geth.h @@ -575,7 +575,14 @@ struct ucc_geth_tx_global_pram { u32 vtagtable[0x8]; /* 8 4-byte VLAN tags */ u32 tqptr; /* a base pointer to the Tx Queues Memory Region */ - u8 res2[0x80 - 0x74]; + u8 res2[0x78 - 0x74]; + u64 snums_en; + u32 l2l3baseptr; /* top byte consists of a few other bit fields */ + + u16 mtu[8]; + u8 res3[0xa8 - 0x94]; + u32 wrrtablebase; /* top byte is reserved */ + u8 res4[0xc0 - 0xac]; } __packed; /* structure representing Extended Filtering Global Parameters in PRAM */ diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c index 7165da0ee9aa50..a6e3f07caf99c3 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c @@ -415,6 +415,10 @@ static void __lb_other_process(struct hns_nic_ring_data *ring_data, /* for mutl buffer*/ new_skb = skb_copy(skb, GFP_ATOMIC); dev_kfree_skb_any(skb); + if (!new_skb) { + netdev_err(ndev, "skb alloc failed\n"); + return; + } skb = new_skb; check_ok = 0; diff --git a/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h index 1ffe8fac702d94..98a9f5e3fe8646 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h +++ b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h @@ -168,7 +168,7 @@ struct hclgevf_mbx_arq_ring { #define hclge_mbx_ring_ptr_move_crq(crq) \ (crq->next_to_use = (crq->next_to_use + 1) % crq->desc_num) #define hclge_mbx_tail_ptr_move_arq(arq) \ - (arq.tail = (arq.tail + 1) % HCLGE_MBX_MAX_ARQ_MSG_SIZE) + (arq.tail = (arq.tail + 1) % HCLGE_MBX_MAX_ARQ_MSG_NUM) #define hclge_mbx_head_ptr_move_arq(arq) \ - (arq.head = (arq.head + 1) % HCLGE_MBX_MAX_ARQ_MSG_SIZE) + (arq.head = (arq.head + 1) % HCLGE_MBX_MAX_ARQ_MSG_NUM) #endif diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 1f026408ad38b6..4321132a4f630f 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -752,7 +752,8 @@ static int hclge_get_sset_count(struct hnae3_handle *handle, int stringset) handle->flags |= HNAE3_SUPPORT_SERDES_SERIAL_LOOPBACK; handle->flags |= HNAE3_SUPPORT_SERDES_PARALLEL_LOOPBACK; - if (hdev->hw.mac.phydev) { + if (hdev->hw.mac.phydev && hdev->hw.mac.phydev->drv && + hdev->hw.mac.phydev->drv->set_loopback) { count += 1; handle->flags |= HNAE3_SUPPORT_PHY_LOOPBACK; } @@ -4484,8 +4485,8 @@ static int hclge_set_rss_tuple(struct hnae3_handle *handle, req->ipv4_sctp_en = tuple_sets; break; case SCTP_V6_FLOW: - if ((nfc->data & RXH_L4_B_0_1) || - (nfc->data & RXH_L4_B_2_3)) + if (hdev->ae_dev->dev_version <= HNAE3_DEVICE_VERSION_V2 && + (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3))) return -EINVAL; req->ipv6_sctp_en = tuple_sets; @@ -4665,6 +4666,8 @@ static void hclge_rss_init_cfg(struct hclge_dev *hdev) vport[i].rss_tuple_sets.ipv6_udp_en = HCLGE_RSS_INPUT_TUPLE_OTHER; vport[i].rss_tuple_sets.ipv6_sctp_en = + hdev->ae_dev->dev_version <= HNAE3_DEVICE_VERSION_V2 ? + HCLGE_RSS_INPUT_TUPLE_SCTP_NO_PORT : HCLGE_RSS_INPUT_TUPLE_SCTP; vport[i].rss_tuple_sets.ipv6_fragment_en = HCLGE_RSS_INPUT_TUPLE_OTHER; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h index 64e6afdb61b8de..213ac73f94cdd3 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h @@ -105,6 +105,8 @@ #define HCLGE_D_IP_BIT BIT(2) #define HCLGE_S_IP_BIT BIT(3) #define HCLGE_V_TAG_BIT BIT(4) +#define HCLGE_RSS_INPUT_TUPLE_SCTP_NO_PORT \ + (HCLGE_D_IP_BIT | HCLGE_S_IP_BIT | HCLGE_V_TAG_BIT) #define HCLGE_RSS_TC_SIZE_0 1 #define HCLGE_RSS_TC_SIZE_1 2 diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index c8e3fdd5999c4f..dc5d150a9c546c 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -901,8 +901,8 @@ static int hclgevf_set_rss_tuple(struct hnae3_handle *handle, req->ipv4_sctp_en = tuple_sets; break; case SCTP_V6_FLOW: - if ((nfc->data & RXH_L4_B_0_1) || - (nfc->data & RXH_L4_B_2_3)) + if (hdev->ae_dev->dev_version <= HNAE3_DEVICE_VERSION_V2 && + (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3))) return -EINVAL; req->ipv6_sctp_en = tuple_sets; @@ -2481,7 +2481,10 @@ static void hclgevf_rss_init_cfg(struct hclgevf_dev *hdev) tuple_sets->ipv4_fragment_en = HCLGEVF_RSS_INPUT_TUPLE_OTHER; tuple_sets->ipv6_tcp_en = HCLGEVF_RSS_INPUT_TUPLE_OTHER; tuple_sets->ipv6_udp_en = HCLGEVF_RSS_INPUT_TUPLE_OTHER; - tuple_sets->ipv6_sctp_en = HCLGEVF_RSS_INPUT_TUPLE_SCTP; + tuple_sets->ipv6_sctp_en = + hdev->ae_dev->dev_version <= HNAE3_DEVICE_VERSION_V2 ? + HCLGEVF_RSS_INPUT_TUPLE_SCTP_NO_PORT : + HCLGEVF_RSS_INPUT_TUPLE_SCTP; tuple_sets->ipv6_fragment_en = HCLGEVF_RSS_INPUT_TUPLE_OTHER; } diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h index c5bcc3894fd54c..526a62f9704669 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h @@ -122,6 +122,8 @@ #define HCLGEVF_D_IP_BIT BIT(2) #define HCLGEVF_S_IP_BIT BIT(3) #define HCLGEVF_V_TAG_BIT BIT(4) +#define HCLGEVF_RSS_INPUT_TUPLE_SCTP_NO_PORT \ + (HCLGEVF_D_IP_BIT | HCLGEVF_S_IP_BIT | HCLGEVF_V_TAG_BIT) #define HCLGEVF_STATS_TIMER_INTERVAL 36U diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index da9450f187176a..627ce1a20473ab 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -932,6 +932,7 @@ static void release_resources(struct ibmvnic_adapter *adapter) release_rx_pools(adapter); release_napi(adapter); + release_login_buffer(adapter); release_login_rsp_buffer(adapter); } @@ -2247,8 +2248,7 @@ static void __ibmvnic_reset(struct work_struct *work) set_current_state(TASK_UNINTERRUPTIBLE); schedule_timeout(60 * HZ); } - } else if (!(rwi->reset_reason == VNIC_RESET_FATAL && - adapter->from_passive_init)) { + } else { rc = do_reset(adapter, rwi, reset_state); } kfree(rwi); @@ -2869,9 +2869,7 @@ static int reset_one_sub_crq_queue(struct ibmvnic_adapter *adapter, int rc; if (!scrq) { - netdev_dbg(adapter->netdev, - "Invalid scrq reset. irq (%d) or msgs (%p).\n", - scrq->irq, scrq->msgs); + netdev_dbg(adapter->netdev, "Invalid scrq reset.\n"); return -EINVAL; } @@ -3768,7 +3766,9 @@ static int send_login(struct ibmvnic_adapter *adapter) return -1; } + release_login_buffer(adapter); release_login_rsp_buffer(adapter); + client_data_len = vnic_client_data_len(adapter); buffer_size = @@ -4979,6 +4979,12 @@ static void ibmvnic_tasklet(struct tasklet_struct *t) while (!done) { /* Pull all the valid messages off the CRQ */ while ((crq = ibmvnic_next_crq(adapter)) != NULL) { + /* This barrier makes sure ibmvnic_next_crq()'s + * crq->generic.first & IBMVNIC_CRQ_CMD_RSP is loaded + * before ibmvnic_handle_crq()'s + * switch(gen_crq->first) and switch(gen_crq->cmd). + */ + dma_rmb(); ibmvnic_handle_crq(crq, adapter); crq->generic.first = 0; } diff --git a/drivers/net/ethernet/intel/e1000e/e1000.h b/drivers/net/ethernet/intel/e1000e/e1000.h index ba7a0f8f693763..5b2143f4b1f85f 100644 --- a/drivers/net/ethernet/intel/e1000e/e1000.h +++ b/drivers/net/ethernet/intel/e1000e/e1000.h @@ -436,6 +436,7 @@ s32 e1000e_get_base_timinca(struct e1000_adapter *adapter, u32 *timinca); #define FLAG2_DFLT_CRC_STRIPPING BIT(12) #define FLAG2_CHECK_RX_HWTSTAMP BIT(13) #define FLAG2_CHECK_SYSTIM_OVERFLOW BIT(14) +#define FLAG2_ENABLE_S0IX_FLOWS BIT(15) #define E1000_RX_DESC_PS(R, i) \ (&(((union e1000_rx_desc_packet_split *)((R).desc))[i])) diff --git a/drivers/net/ethernet/intel/e1000e/ethtool.c b/drivers/net/ethernet/intel/e1000e/ethtool.c index 03215b0aee4bd3..06442e6bef7310 100644 --- a/drivers/net/ethernet/intel/e1000e/ethtool.c +++ b/drivers/net/ethernet/intel/e1000e/ethtool.c @@ -23,6 +23,13 @@ struct e1000_stats { int stat_offset; }; +static const char e1000e_priv_flags_strings[][ETH_GSTRING_LEN] = { +#define E1000E_PRIV_FLAGS_S0IX_ENABLED BIT(0) + "s0ix-enabled", +}; + +#define E1000E_PRIV_FLAGS_STR_LEN ARRAY_SIZE(e1000e_priv_flags_strings) + #define E1000_STAT(str, m) { \ .stat_string = str, \ .type = E1000_STATS, \ @@ -1776,6 +1783,8 @@ static int e1000e_get_sset_count(struct net_device __always_unused *netdev, return E1000_TEST_LEN; case ETH_SS_STATS: return E1000_STATS_LEN; + case ETH_SS_PRIV_FLAGS: + return E1000E_PRIV_FLAGS_STR_LEN; default: return -EOPNOTSUPP; } @@ -2097,6 +2106,10 @@ static void e1000_get_strings(struct net_device __always_unused *netdev, p += ETH_GSTRING_LEN; } break; + case ETH_SS_PRIV_FLAGS: + memcpy(data, e1000e_priv_flags_strings, + E1000E_PRIV_FLAGS_STR_LEN * ETH_GSTRING_LEN); + break; } } @@ -2305,6 +2318,37 @@ static int e1000e_get_ts_info(struct net_device *netdev, return 0; } +static u32 e1000e_get_priv_flags(struct net_device *netdev) +{ + struct e1000_adapter *adapter = netdev_priv(netdev); + u32 priv_flags = 0; + + if (adapter->flags2 & FLAG2_ENABLE_S0IX_FLOWS) + priv_flags |= E1000E_PRIV_FLAGS_S0IX_ENABLED; + + return priv_flags; +} + +static int e1000e_set_priv_flags(struct net_device *netdev, u32 priv_flags) +{ + struct e1000_adapter *adapter = netdev_priv(netdev); + unsigned int flags2 = adapter->flags2; + + flags2 &= ~FLAG2_ENABLE_S0IX_FLOWS; + if (priv_flags & E1000E_PRIV_FLAGS_S0IX_ENABLED) { + struct e1000_hw *hw = &adapter->hw; + + if (hw->mac.type < e1000_pch_cnp) + return -EINVAL; + flags2 |= FLAG2_ENABLE_S0IX_FLOWS; + } + + if (flags2 != adapter->flags2) + adapter->flags2 = flags2; + + return 0; +} + static const struct ethtool_ops e1000_ethtool_ops = { .supported_coalesce_params = ETHTOOL_COALESCE_RX_USECS, .get_drvinfo = e1000_get_drvinfo, @@ -2336,6 +2380,8 @@ static const struct ethtool_ops e1000_ethtool_ops = { .set_eee = e1000e_set_eee, .get_link_ksettings = e1000_get_link_ksettings, .set_link_ksettings = e1000_set_link_ksettings, + .get_priv_flags = e1000e_get_priv_flags, + .set_priv_flags = e1000e_set_priv_flags, }; void e1000e_set_ethtool_ops(struct net_device *netdev) diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c index 9aa6fad8ed4772..6fb46682b058a2 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c @@ -1240,6 +1240,9 @@ static s32 e1000_disable_ulp_lpt_lp(struct e1000_hw *hw, bool force) return 0; if (er32(FWSM) & E1000_ICH_FWSM_FW_VALID) { + struct e1000_adapter *adapter = hw->adapter; + bool firmware_bug = false; + if (force) { /* Request ME un-configure ULP mode in the PHY */ mac_reg = er32(H2ME); @@ -1248,16 +1251,24 @@ static s32 e1000_disable_ulp_lpt_lp(struct e1000_hw *hw, bool force) ew32(H2ME, mac_reg); } - /* Poll up to 300msec for ME to clear ULP_CFG_DONE. */ + /* Poll up to 2.5 seconds for ME to clear ULP_CFG_DONE. + * If this takes more than 1 second, show a warning indicating a + * firmware bug + */ while (er32(FWSM) & E1000_FWSM_ULP_CFG_DONE) { - if (i++ == 30) { + if (i++ == 250) { ret_val = -E1000_ERR_PHY; goto out; } + if (i > 100 && !firmware_bug) + firmware_bug = true; usleep_range(10000, 11000); } - e_dbg("ULP_CONFIG_DONE cleared after %dmsec\n", i * 10); + if (firmware_bug) + e_warn("ULP_CONFIG_DONE took %dmsec. This is a firmware bug\n", i * 10); + else + e_dbg("ULP_CONFIG_DONE cleared after %dmsec\n", i * 10); if (force) { mac_reg = er32(H2ME); diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index 128ab6898070e0..e9b82c209c2df6 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -103,45 +103,6 @@ static const struct e1000_reg_info e1000_reg_info_tbl[] = { {0, NULL} }; -struct e1000e_me_supported { - u16 device_id; /* supported device ID */ -}; - -static const struct e1000e_me_supported me_supported[] = { - {E1000_DEV_ID_PCH_LPT_I217_LM}, - {E1000_DEV_ID_PCH_LPTLP_I218_LM}, - {E1000_DEV_ID_PCH_I218_LM2}, - {E1000_DEV_ID_PCH_I218_LM3}, - {E1000_DEV_ID_PCH_SPT_I219_LM}, - {E1000_DEV_ID_PCH_SPT_I219_LM2}, - {E1000_DEV_ID_PCH_LBG_I219_LM3}, - {E1000_DEV_ID_PCH_SPT_I219_LM4}, - {E1000_DEV_ID_PCH_SPT_I219_LM5}, - {E1000_DEV_ID_PCH_CNP_I219_LM6}, - {E1000_DEV_ID_PCH_CNP_I219_LM7}, - {E1000_DEV_ID_PCH_ICP_I219_LM8}, - {E1000_DEV_ID_PCH_ICP_I219_LM9}, - {E1000_DEV_ID_PCH_CMP_I219_LM10}, - {E1000_DEV_ID_PCH_CMP_I219_LM11}, - {E1000_DEV_ID_PCH_CMP_I219_LM12}, - {E1000_DEV_ID_PCH_TGP_I219_LM13}, - {E1000_DEV_ID_PCH_TGP_I219_LM14}, - {E1000_DEV_ID_PCH_TGP_I219_LM15}, - {0} -}; - -static bool e1000e_check_me(u16 device_id) -{ - struct e1000e_me_supported *id; - - for (id = (struct e1000e_me_supported *)me_supported; - id->device_id; id++) - if (device_id == id->device_id) - return true; - - return false; -} - /** * __ew32_prepare - prepare to write to MAC CSR register on certain parts * @hw: pointer to the HW structure @@ -6962,7 +6923,6 @@ static __maybe_unused int e1000e_pm_suspend(struct device *dev) struct net_device *netdev = pci_get_drvdata(to_pci_dev(dev)); struct e1000_adapter *adapter = netdev_priv(netdev); struct pci_dev *pdev = to_pci_dev(dev); - struct e1000_hw *hw = &adapter->hw; int rc; e1000e_flush_lpic(pdev); @@ -6970,13 +6930,13 @@ static __maybe_unused int e1000e_pm_suspend(struct device *dev) e1000e_pm_freeze(dev); rc = __e1000_shutdown(pdev, false); - if (rc) + if (rc) { e1000e_pm_thaw(dev); - - /* Introduce S0ix implementation */ - if (hw->mac.type >= e1000_pch_cnp && - !e1000e_check_me(hw->adapter->pdev->device)) - e1000e_s0ix_entry_flow(adapter); + } else { + /* Introduce S0ix implementation */ + if (adapter->flags2 & FLAG2_ENABLE_S0IX_FLOWS) + e1000e_s0ix_entry_flow(adapter); + } return rc; } @@ -6986,12 +6946,10 @@ static __maybe_unused int e1000e_pm_resume(struct device *dev) struct net_device *netdev = pci_get_drvdata(to_pci_dev(dev)); struct e1000_adapter *adapter = netdev_priv(netdev); struct pci_dev *pdev = to_pci_dev(dev); - struct e1000_hw *hw = &adapter->hw; int rc; /* Introduce S0ix implementation */ - if (hw->mac.type >= e1000_pch_cnp && - !e1000e_check_me(hw->adapter->pdev->device)) + if (adapter->flags2 & FLAG2_ENABLE_S0IX_FLOWS) e1000e_s0ix_exit_flow(adapter); rc = __e1000_resume(pdev); @@ -7655,6 +7613,9 @@ static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (!(adapter->flags & FLAG_HAS_AMT)) e1000e_get_hw_control(adapter); + if (hw->mac.type >= e1000_pch_cnp) + adapter->flags2 |= FLAG2_ENABLE_S0IX_FLOWS; + strlcpy(netdev->name, "eth%d", sizeof(netdev->name)); err = register_netdev(netdev); if (err) diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index d231a2cdd98ff2..118473dfdcbd24 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -120,6 +120,7 @@ enum i40e_state_t { __I40E_RESET_INTR_RECEIVED, __I40E_REINIT_REQUESTED, __I40E_PF_RESET_REQUESTED, + __I40E_PF_RESET_AND_REBUILD_REQUESTED, __I40E_CORE_RESET_REQUESTED, __I40E_GLOBAL_RESET_REQUESTED, __I40E_EMP_RESET_INTR_RECEIVED, @@ -146,6 +147,8 @@ enum i40e_state_t { }; #define I40E_PF_RESET_FLAG BIT_ULL(__I40E_PF_RESET_REQUESTED) +#define I40E_PF_RESET_AND_REBUILD_FLAG \ + BIT_ULL(__I40E_PF_RESET_AND_REBUILD_REQUESTED) /* VSI state flags */ enum i40e_vsi_state_t { diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 1337686bd09980..1db482d310c2d5 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -36,6 +36,8 @@ static int i40e_setup_misc_vector(struct i40e_pf *pf); static void i40e_determine_queue_usage(struct i40e_pf *pf); static int i40e_setup_pf_filter_control(struct i40e_pf *pf); static void i40e_prep_for_reset(struct i40e_pf *pf, bool lock_acquired); +static void i40e_reset_and_rebuild(struct i40e_pf *pf, bool reinit, + bool lock_acquired); static int i40e_reset(struct i40e_pf *pf); static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired); static int i40e_setup_misc_vector_for_recovery_mode(struct i40e_pf *pf); @@ -8536,6 +8538,14 @@ void i40e_do_reset(struct i40e_pf *pf, u32 reset_flags, bool lock_acquired) "FW LLDP is disabled\n" : "FW LLDP is enabled\n"); + } else if (reset_flags & I40E_PF_RESET_AND_REBUILD_FLAG) { + /* Request a PF Reset + * + * Resets PF and reinitializes PFs VSI. + */ + i40e_prep_for_reset(pf, lock_acquired); + i40e_reset_and_rebuild(pf, true, lock_acquired); + } else if (reset_flags & BIT_ULL(__I40E_REINIT_REQUESTED)) { int v; diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 1b5390ec3d78a4..2872c4dc77f070 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -1772,7 +1772,7 @@ int i40e_pci_sriov_configure(struct pci_dev *pdev, int num_vfs) if (num_vfs) { if (!(pf->flags & I40E_FLAG_VEB_MODE_ENABLED)) { pf->flags |= I40E_FLAG_VEB_MODE_ENABLED; - i40e_do_reset_safe(pf, I40E_PF_RESET_FLAG); + i40e_do_reset_safe(pf, I40E_PF_RESET_AND_REBUILD_FLAG); } ret = i40e_pci_sriov_enable(pdev, num_vfs); goto sriov_configure_out; @@ -1781,7 +1781,7 @@ int i40e_pci_sriov_configure(struct pci_dev *pdev, int num_vfs) if (!pci_vfs_assigned(pf->pdev)) { i40e_free_vfs(pf); pf->flags &= ~I40E_FLAG_VEB_MODE_ENABLED; - i40e_do_reset_safe(pf, I40E_PF_RESET_FLAG); + i40e_do_reset_safe(pf, I40E_PF_RESET_AND_REBUILD_FLAG); } else { dev_warn(&pdev->dev, "Unable to free VFs because some are assigned to VMs.\n"); ret = -EINVAL; @@ -4046,20 +4046,16 @@ int i40e_ndo_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac) goto error_param; vf = &pf->vf[vf_id]; - vsi = pf->vsi[vf->lan_vsi_idx]; /* When the VF is resetting wait until it is done. * It can take up to 200 milliseconds, * but wait for up to 300 milliseconds to be safe. - * If the VF is indeed in reset, the vsi pointer has - * to show on the newly loaded vsi under pf->vsi[id]. + * Acquire the VSI pointer only after the VF has been + * properly initialized. */ for (i = 0; i < 15; i++) { - if (test_bit(I40E_VF_STATE_INIT, &vf->vf_states)) { - if (i > 0) - vsi = pf->vsi[vf->lan_vsi_idx]; + if (test_bit(I40E_VF_STATE_INIT, &vf->vf_states)) break; - } msleep(20); } if (!test_bit(I40E_VF_STATE_INIT, &vf->vf_states)) { @@ -4068,6 +4064,7 @@ int i40e_ndo_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac) ret = -EAGAIN; goto error_param; } + vsi = pf->vsi[vf->lan_vsi_idx]; if (is_multicast_ether_addr(mac)) { dev_err(&pf->pdev->dev, diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c index 567fd67e900efd..8557807b417175 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c @@ -219,8 +219,11 @@ bool i40e_alloc_rx_buffers_zc(struct i40e_ring *rx_ring, u16 count) } while (count); no_buffers: - if (rx_ring->next_to_use != ntu) + if (rx_ring->next_to_use != ntu) { + /* clear the status bits for the next_to_use descriptor */ + rx_desc->wb.qword1.status_error_len = 0; i40e_release_rx_desc(rx_ring, ntu); + } return ok; } @@ -345,12 +348,12 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget) * SBP is *not* set in PRT_SBPVSI (default not set). */ skb = i40e_construct_skb_zc(rx_ring, *bi); - *bi = NULL; if (!skb) { rx_ring->rx_stats.alloc_buff_failed++; break; } + *bi = NULL; cleaned_count++; i40e_inc_ntc(rx_ring); diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 95543dfd4fe77c..0a867d64d46753 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -1834,11 +1834,9 @@ static int iavf_init_get_resources(struct iavf_adapter *adapter) netif_tx_stop_all_queues(netdev); if (CLIENT_ALLOWED(adapter)) { err = iavf_lan_add_device(adapter); - if (err) { - rtnl_unlock(); + if (err) dev_info(&pdev->dev, "Failed to add VF to client API service list: %d\n", err); - } } dev_info(&pdev->dev, "MAC address: %pM\n", adapter->hw.mac.addr); if (netdev->features & NETIF_F_GRO) diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index a0723831c4e487..54cf382fddaf93 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -68,7 +68,9 @@ #define ICE_INT_NAME_STR_LEN (IFNAMSIZ + 16) #define ICE_AQ_LEN 64 #define ICE_MBXSQ_LEN 64 -#define ICE_MIN_MSIX 2 +#define ICE_MIN_LAN_TXRX_MSIX 1 +#define ICE_MIN_LAN_OICR_MSIX 1 +#define ICE_MIN_MSIX (ICE_MIN_LAN_TXRX_MSIX + ICE_MIN_LAN_OICR_MSIX) #define ICE_FDIR_MSIX 1 #define ICE_NO_VSI 0xffff #define ICE_VSI_MAP_CONTIG 0 diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index 9e8e9531cd8718..69c113a4de7e6a 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -3258,8 +3258,8 @@ ice_set_rxfh(struct net_device *netdev, const u32 *indir, const u8 *key, */ static int ice_get_max_txq(struct ice_pf *pf) { - return min_t(int, num_online_cpus(), - pf->hw.func_caps.common_cap.num_txq); + return min3(pf->num_lan_msix, (u16)num_online_cpus(), + (u16)pf->hw.func_caps.common_cap.num_txq); } /** @@ -3268,8 +3268,8 @@ static int ice_get_max_txq(struct ice_pf *pf) */ static int ice_get_max_rxq(struct ice_pf *pf) { - return min_t(int, num_online_cpus(), - pf->hw.func_caps.common_cap.num_rxq); + return min3(pf->num_lan_msix, (u16)num_online_cpus(), + (u16)pf->hw.func_caps.common_cap.num_rxq); } /** diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c b/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c index 2d27f66ac8534c..192729546bbfc4 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c @@ -1576,7 +1576,13 @@ ice_set_fdir_input_set(struct ice_vsi *vsi, struct ethtool_rx_flow_spec *fsp, sizeof(struct in6_addr)); input->ip.v6.l4_header = fsp->h_u.usr_ip6_spec.l4_4_bytes; input->ip.v6.tc = fsp->h_u.usr_ip6_spec.tclass; - input->ip.v6.proto = fsp->h_u.usr_ip6_spec.l4_proto; + + /* if no protocol requested, use IPPROTO_NONE */ + if (!fsp->m_u.usr_ip6_spec.l4_proto) + input->ip.v6.proto = IPPROTO_NONE; + else + input->ip.v6.proto = fsp->h_u.usr_ip6_spec.l4_proto; + memcpy(input->mask.v6.dst_ip, fsp->m_u.usr_ip6_spec.ip6dst, sizeof(struct in6_addr)); memcpy(input->mask.v6.src_ip, fsp->m_u.usr_ip6_spec.ip6src, diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 3df67486d42d98..ad9c22a1b97a07 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -161,8 +161,9 @@ static void ice_vsi_set_num_qs(struct ice_vsi *vsi, u16 vf_id) switch (vsi->type) { case ICE_VSI_PF: - vsi->alloc_txq = min_t(int, ice_get_avail_txq_count(pf), - num_online_cpus()); + vsi->alloc_txq = min3(pf->num_lan_msix, + ice_get_avail_txq_count(pf), + (u16)num_online_cpus()); if (vsi->req_txq) { vsi->alloc_txq = vsi->req_txq; vsi->num_txq = vsi->req_txq; @@ -174,8 +175,9 @@ static void ice_vsi_set_num_qs(struct ice_vsi *vsi, u16 vf_id) if (!test_bit(ICE_FLAG_RSS_ENA, pf->flags)) { vsi->alloc_rxq = 1; } else { - vsi->alloc_rxq = min_t(int, ice_get_avail_rxq_count(pf), - num_online_cpus()); + vsi->alloc_rxq = min3(pf->num_lan_msix, + ice_get_avail_rxq_count(pf), + (u16)num_online_cpus()); if (vsi->req_rxq) { vsi->alloc_rxq = vsi->req_rxq; vsi->num_rxq = vsi->req_rxq; @@ -184,7 +186,9 @@ static void ice_vsi_set_num_qs(struct ice_vsi *vsi, u16 vf_id) pf->num_lan_rx = vsi->alloc_rxq; - vsi->num_q_vectors = max_t(int, vsi->alloc_rxq, vsi->alloc_txq); + vsi->num_q_vectors = min_t(int, pf->num_lan_msix, + max_t(int, vsi->alloc_rxq, + vsi->alloc_txq)); break; case ICE_VSI_VF: vf = &pf->vf[vsi->vf_id]; diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 2dea4d0e9415c5..bacb368063e340 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -3433,18 +3433,14 @@ static int ice_ena_msix_range(struct ice_pf *pf) if (v_actual < v_budget) { dev_warn(dev, "not enough OS MSI-X vectors. requested = %d, obtained = %d\n", v_budget, v_actual); -/* 2 vectors each for LAN and RDMA (traffic + OICR), one for flow director */ -#define ICE_MIN_LAN_VECS 2 -#define ICE_MIN_RDMA_VECS 2 -#define ICE_MIN_VECS (ICE_MIN_LAN_VECS + ICE_MIN_RDMA_VECS + 1) - if (v_actual < ICE_MIN_LAN_VECS) { + if (v_actual < ICE_MIN_MSIX) { /* error if we can't get minimum vectors */ pci_disable_msix(pf->pdev); err = -ERANGE; goto msix_err; } else { - pf->num_lan_msix = ICE_MIN_LAN_VECS; + pf->num_lan_msix = ICE_MIN_LAN_TXRX_MSIX; } } @@ -4887,9 +4883,15 @@ static int ice_set_mac_address(struct net_device *netdev, void *pi) goto err_update_filters; } - /* Add filter for new MAC. If filter exists, just return success */ + /* Add filter for new MAC. If filter exists, return success */ status = ice_fltr_add_mac(vsi, mac, ICE_FWD_TO_VSI); if (status == ICE_ERR_ALREADY_EXISTS) { + /* Although this MAC filter is already present in hardware it's + * possible in some cases (e.g. bonding) that dev_addr was + * modified outside of the driver and needs to be restored back + * to this value. + */ + memcpy(netdev->dev_addr, mac, netdev->addr_len); netdev_dbg(netdev, "filter for MAC %pM already exists\n", mac); return 0; } diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 23eca2f0a03b1c..af5b7f33db9afb 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -1923,12 +1923,15 @@ int ice_tx_csum(struct ice_tx_buf *first, struct ice_tx_offload_params *off) ICE_TX_CTX_EIPT_IPV4_NO_CSUM; l4_proto = ip.v4->protocol; } else if (first->tx_flags & ICE_TX_FLAGS_IPV6) { + int ret; + tunnel |= ICE_TX_CTX_EIPT_IPV6; exthdr = ip.hdr + sizeof(*ip.v6); l4_proto = ip.v6->nexthdr; - if (l4.hdr != exthdr) - ipv6_skip_exthdr(skb, exthdr - skb->data, - &l4_proto, &frag_off); + ret = ipv6_skip_exthdr(skb, exthdr - skb->data, + &l4_proto, &frag_off); + if (ret < 0) + return -1; } /* define outer transport */ diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index 797886524054cb..98101a8e2952d5 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -446,8 +446,11 @@ bool ice_alloc_rx_bufs_zc(struct ice_ring *rx_ring, u16 count) } } while (--count); - if (rx_ring->next_to_use != ntu) + if (rx_ring->next_to_use != ntu) { + /* clear the status bits for the next_to_use descriptor */ + rx_desc->wb.status_error0 = 0; ice_release_rx_desc(rx_ring, ntu); + } return ret; } diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c index 61d331ce38cddc..831f2f09de5fbe 100644 --- a/drivers/net/ethernet/intel/igc/igc_ethtool.c +++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c @@ -1675,12 +1675,18 @@ static int igc_ethtool_get_link_ksettings(struct net_device *netdev, cmd->base.phy_address = hw->phy.addr; /* advertising link modes */ - ethtool_link_ksettings_add_link_mode(cmd, advertising, 10baseT_Half); - ethtool_link_ksettings_add_link_mode(cmd, advertising, 10baseT_Full); - ethtool_link_ksettings_add_link_mode(cmd, advertising, 100baseT_Half); - ethtool_link_ksettings_add_link_mode(cmd, advertising, 100baseT_Full); - ethtool_link_ksettings_add_link_mode(cmd, advertising, 1000baseT_Full); - ethtool_link_ksettings_add_link_mode(cmd, advertising, 2500baseT_Full); + if (hw->phy.autoneg_advertised & ADVERTISE_10_HALF) + ethtool_link_ksettings_add_link_mode(cmd, advertising, 10baseT_Half); + if (hw->phy.autoneg_advertised & ADVERTISE_10_FULL) + ethtool_link_ksettings_add_link_mode(cmd, advertising, 10baseT_Full); + if (hw->phy.autoneg_advertised & ADVERTISE_100_HALF) + ethtool_link_ksettings_add_link_mode(cmd, advertising, 100baseT_Half); + if (hw->phy.autoneg_advertised & ADVERTISE_100_FULL) + ethtool_link_ksettings_add_link_mode(cmd, advertising, 100baseT_Full); + if (hw->phy.autoneg_advertised & ADVERTISE_1000_FULL) + ethtool_link_ksettings_add_link_mode(cmd, advertising, 1000baseT_Full); + if (hw->phy.autoneg_advertised & ADVERTISE_2500_FULL) + ethtool_link_ksettings_add_link_mode(cmd, advertising, 2500baseT_Full); /* set autoneg settings */ if (hw->mac.autoneg == 1) { @@ -1792,6 +1798,12 @@ igc_ethtool_set_link_ksettings(struct net_device *netdev, ethtool_convert_link_mode_to_legacy_u32(&advertising, cmd->link_modes.advertising); + /* Converting to legacy u32 drops ETHTOOL_LINK_MODE_2500baseT_Full_BIT. + * We have to check this and convert it to ADVERTISE_2500_FULL + * (aka ETHTOOL_LINK_MODE_2500baseX_Full_BIT) explicitly. + */ + if (ethtool_link_ksettings_test_link_mode(cmd, advertising, 2500baseT_Full)) + advertising |= ADVERTISE_2500_FULL; if (cmd->base.autoneg == AUTONEG_ENABLE) { hw->mac.autoneg = 1; diff --git a/drivers/net/ethernet/korina.c b/drivers/net/ethernet/korina.c index bf48f0ded9c7da..925161959b9ba8 100644 --- a/drivers/net/ethernet/korina.c +++ b/drivers/net/ethernet/korina.c @@ -219,7 +219,7 @@ static int korina_send_packet(struct sk_buff *skb, struct net_device *dev) dev_kfree_skb_any(skb); spin_unlock_irqrestore(&lp->lock, flags); - return NETDEV_TX_BUSY; + return NETDEV_TX_OK; } } diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 4a9041ee1b3915..ceb4f27898002d 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -4409,7 +4409,7 @@ static int mvneta_xdp_setup(struct net_device *dev, struct bpf_prog *prog, struct bpf_prog *old_prog; if (prog && dev->mtu > MVNETA_MAX_RX_BUF_SIZE) { - NL_SET_ERR_MSG_MOD(extack, "Jumbo frames not supported on XDP"); + NL_SET_ERR_MSG_MOD(extack, "MTU too large for XDP"); return -EOPNOTSUPP; } @@ -5232,7 +5232,7 @@ static int mvneta_probe(struct platform_device *pdev) err = mvneta_port_power_up(pp, pp->phy_interface); if (err < 0) { dev_err(&pdev->dev, "can't power up port\n"); - return err; + goto err_netdev; } /* Armada3700 network controller does not support per-cpu diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index cea886c5bcb57b..d1f7b51cab6206 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -1231,7 +1231,7 @@ static void mvpp22_gop_init_rgmii(struct mvpp2_port *port) regmap_read(priv->sysctrl_base, GENCONF_CTRL0, &val); if (port->gop_id == 2) - val |= GENCONF_CTRL0_PORT0_RGMII | GENCONF_CTRL0_PORT1_RGMII; + val |= GENCONF_CTRL0_PORT0_RGMII; else if (port->gop_id == 3) val |= GENCONF_CTRL0_PORT1_RGMII_MII; regmap_write(priv->sysctrl_base, GENCONF_CTRL0, val); @@ -2370,17 +2370,18 @@ static void mvpp2_rx_pkts_coal_set(struct mvpp2_port *port, static void mvpp2_tx_pkts_coal_set(struct mvpp2_port *port, struct mvpp2_tx_queue *txq) { - unsigned int thread = mvpp2_cpu_to_thread(port->priv, get_cpu()); + unsigned int thread; u32 val; if (txq->done_pkts_coal > MVPP2_TXQ_THRESH_MASK) txq->done_pkts_coal = MVPP2_TXQ_THRESH_MASK; val = (txq->done_pkts_coal << MVPP2_TXQ_THRESH_OFFSET); - mvpp2_thread_write(port->priv, thread, MVPP2_TXQ_NUM_REG, txq->id); - mvpp2_thread_write(port->priv, thread, MVPP2_TXQ_THRESH_REG, val); - - put_cpu(); + /* PKT-coalescing registers are per-queue + per-thread */ + for (thread = 0; thread < MVPP2_MAX_THREADS; thread++) { + mvpp2_thread_write(port->priv, thread, MVPP2_TXQ_NUM_REG, txq->id); + mvpp2_thread_write(port->priv, thread, MVPP2_TXQ_THRESH_REG, val); + } } static u32 mvpp2_usec_to_cycles(u32 usec, unsigned long clk_hz) @@ -5479,7 +5480,7 @@ static int mvpp2_port_init(struct mvpp2_port *port) struct mvpp2 *priv = port->priv; struct mvpp2_txq_pcpu *txq_pcpu; unsigned int thread; - int queue, err; + int queue, err, val; /* Checks for hardware constraints */ if (port->first_rxq + port->nrxqs > @@ -5493,6 +5494,18 @@ static int mvpp2_port_init(struct mvpp2_port *port) mvpp2_egress_disable(port); mvpp2_port_disable(port); + if (mvpp2_is_xlg(port->phy_interface)) { + val = readl(port->base + MVPP22_XLG_CTRL0_REG); + val &= ~MVPP22_XLG_CTRL0_FORCE_LINK_PASS; + val |= MVPP22_XLG_CTRL0_FORCE_LINK_DOWN; + writel(val, port->base + MVPP22_XLG_CTRL0_REG); + } else { + val = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG); + val &= ~MVPP2_GMAC_FORCE_LINK_PASS; + val |= MVPP2_GMAC_FORCE_LINK_DOWN; + writel(val, port->base + MVPP2_GMAC_AUTONEG_CONFIG); + } + port->tx_time_coal = MVPP2_TXDONE_COAL_USEC; port->txqs = devm_kcalloc(dev, port->ntxqs, sizeof(*port->txqs), @@ -5861,8 +5874,6 @@ static void mvpp2_phylink_validate(struct phylink_config *config, phylink_set(mask, Autoneg); phylink_set_port_modes(mask); - phylink_set(mask, Pause); - phylink_set(mask, Asym_Pause); switch (state->interface) { case PHY_INTERFACE_MODE_10GBASER: diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c index 5692c6087bbb07..a30eb90ba3d28a 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c @@ -405,6 +405,38 @@ static int mvpp2_prs_tcam_first_free(struct mvpp2 *priv, unsigned char start, return -EINVAL; } +/* Drop flow control pause frames */ +static void mvpp2_prs_drop_fc(struct mvpp2 *priv) +{ + unsigned char da[ETH_ALEN] = { 0x01, 0x80, 0xC2, 0x00, 0x00, 0x01 }; + struct mvpp2_prs_entry pe; + unsigned int len; + + memset(&pe, 0, sizeof(pe)); + + /* For all ports - drop flow control frames */ + pe.index = MVPP2_PE_FC_DROP; + mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_MAC); + + /* Set match on DA */ + len = ETH_ALEN; + while (len--) + mvpp2_prs_tcam_data_byte_set(&pe, len, da[len], 0xff); + + mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_DROP_MASK, + MVPP2_PRS_RI_DROP_MASK); + + mvpp2_prs_sram_bits_set(&pe, MVPP2_PRS_SRAM_LU_GEN_BIT, 1); + mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_FLOWS); + + /* Mask all ports */ + mvpp2_prs_tcam_port_map_set(&pe, MVPP2_PRS_PORT_MASK); + + /* Update shadow table and hw entry */ + mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_MAC); + mvpp2_prs_hw_write(priv, &pe); +} + /* Enable/disable dropping all mac da's */ static void mvpp2_prs_mac_drop_all_set(struct mvpp2 *priv, int port, bool add) { @@ -1162,6 +1194,7 @@ static void mvpp2_prs_mac_init(struct mvpp2 *priv) mvpp2_prs_hw_write(priv, &pe); /* Create dummy entries for drop all and promiscuous modes */ + mvpp2_prs_drop_fc(priv); mvpp2_prs_mac_drop_all_set(priv, 0, false); mvpp2_prs_mac_promisc_set(priv, 0, MVPP2_PRS_L2_UNI_CAST, false); mvpp2_prs_mac_promisc_set(priv, 0, MVPP2_PRS_L2_MULTI_CAST, false); @@ -1647,8 +1680,9 @@ static int mvpp2_prs_pppoe_init(struct mvpp2 *priv) mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_IP6); mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_L3_IP6, MVPP2_PRS_RI_L3_PROTO_MASK); - /* Skip eth_type + 4 bytes of IPv6 header */ - mvpp2_prs_sram_shift_set(&pe, MVPP2_ETH_TYPE_LEN + 4, + /* Jump to DIP of IPV6 header */ + mvpp2_prs_sram_shift_set(&pe, MVPP2_ETH_TYPE_LEN + 8 + + MVPP2_MAX_L3_ADDR_SIZE, MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD); /* Set L3 offset */ mvpp2_prs_sram_offset_set(&pe, MVPP2_PRS_SRAM_UDF_TYPE_L3, diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.h b/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.h index e22f6c85d38034..4b68dd37473388 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.h +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.h @@ -129,7 +129,7 @@ #define MVPP2_PE_VID_EDSA_FLTR_DEFAULT (MVPP2_PRS_TCAM_SRAM_SIZE - 7) #define MVPP2_PE_VLAN_DBL (MVPP2_PRS_TCAM_SRAM_SIZE - 6) #define MVPP2_PE_VLAN_NONE (MVPP2_PRS_TCAM_SRAM_SIZE - 5) -/* reserved */ +#define MVPP2_PE_FC_DROP (MVPP2_PRS_TCAM_SRAM_SIZE - 4) #define MVPP2_PE_MAC_MC_PROMISCUOUS (MVPP2_PRS_TCAM_SRAM_SIZE - 3) #define MVPP2_PE_MAC_UC_PROMISCUOUS (MVPP2_PRS_TCAM_SRAM_SIZE - 2) #define MVPP2_PE_MAC_NON_PROMISCUOUS (MVPP2_PRS_TCAM_SRAM_SIZE - 1) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c index 8f17e26dca538c..fc27a40202c6da 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c @@ -862,8 +862,10 @@ static int cgx_lmac_init(struct cgx *cgx) if (!lmac) return -ENOMEM; lmac->name = kcalloc(1, sizeof("cgx_fwi_xxx_yyy"), GFP_KERNEL); - if (!lmac->name) - return -ENOMEM; + if (!lmac->name) { + err = -ENOMEM; + goto err_lmac_free; + } sprintf(lmac->name, "cgx_fwi_%d_%d", cgx->cgx_id, i); lmac->lmac_id = i; lmac->cgx = cgx; @@ -874,7 +876,7 @@ static int cgx_lmac_init(struct cgx *cgx) CGX_LMAC_FWI + i * 9), cgx_fwi_event_handler, 0, lmac->name, lmac); if (err) - return err; + goto err_irq; /* Enable interrupt */ cgx_write(cgx, lmac->lmac_id, CGXX_CMRX_INT_ENA_W1S, @@ -886,6 +888,12 @@ static int cgx_lmac_init(struct cgx *cgx) } return cgx_lmac_verify_fwi_version(cgx); + +err_irq: + kfree(lmac->name); +err_lmac_free: + kfree(lmac); + return err; } static int cgx_lmac_exit(struct cgx *cgx) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c index fa9152ff5e2a08..f4ecc755eaff13 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c @@ -454,6 +454,9 @@ int rvu_mbox_handler_cgx_mac_addr_set(struct rvu *rvu, int pf = rvu_get_pf(req->hdr.pcifunc); u8 cgx_id, lmac_id; + if (!is_cgx_config_permitted(rvu, req->hdr.pcifunc)) + return -EPERM; + rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id); cgx_lmac_addr_set(cgx_id, lmac_id, req->mac_addr); @@ -470,6 +473,9 @@ int rvu_mbox_handler_cgx_mac_addr_get(struct rvu *rvu, int rc = 0, i; u64 cfg; + if (!is_cgx_config_permitted(rvu, req->hdr.pcifunc)) + return -EPERM; + rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id); rsp->hdr.rc = rc; diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c index d2581090f9a408..df238e46e2aebe 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c @@ -473,10 +473,11 @@ dma_addr_t __otx2_alloc_rbuf(struct otx2_nic *pfvf, struct otx2_pool *pool) dma_addr_t iova; u8 *buf; - buf = napi_alloc_frag(pool->rbsize); + buf = napi_alloc_frag(pool->rbsize + OTX2_ALIGN); if (unlikely(!buf)) return -ENOMEM; + buf = PTR_ALIGN(buf, OTX2_ALIGN); iova = dma_map_single_attrs(pfvf->dev, buf, pool->rbsize, DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC); if (unlikely(dma_mapping_error(pfvf->dev, iova))) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/health.c b/drivers/net/ethernet/mellanox/mlx5/core/en/health.c index 69a05da0e3e3d8..e03e78a35df008 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.c @@ -275,7 +275,7 @@ int mlx5e_health_rsc_fmsg_dump(struct mlx5e_priv *priv, struct mlx5_rsc_key *key err = devlink_fmsg_binary_pair_nest_start(fmsg, "data"); if (err) - return err; + goto free_page; cmd = mlx5_rsc_dump_cmd_create(mdev, key); if (IS_ERR(cmd)) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c index d29af7b9c695a6..76177f7c5ec292 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c @@ -626,6 +626,11 @@ bool mlx5e_rep_tc_update_skb(struct mlx5_cqe64 *cqe, if (!reg_c0) return true; + /* If reg_c0 is not equal to the default flow tag then skb->mark + * is not supported and must be reset back to 0. + */ + skb->mark = 0; + priv = netdev_priv(skb->dev); esw = priv->mdev->priv.eswitch; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c index e521254d886ef3..6bc6b48a56dc79 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c @@ -118,16 +118,17 @@ struct mlx5_ct_tuple { u16 zone; }; -struct mlx5_ct_shared_counter { +struct mlx5_ct_counter { struct mlx5_fc *counter; refcount_t refcount; + bool is_shared; }; struct mlx5_ct_entry { struct rhash_head node; struct rhash_head tuple_node; struct rhash_head tuple_nat_node; - struct mlx5_ct_shared_counter *shared_counter; + struct mlx5_ct_counter *counter; unsigned long cookie; unsigned long restore_cookie; struct mlx5_ct_tuple tuple; @@ -166,6 +167,12 @@ static const struct rhashtable_params tuples_nat_ht_params = { .min_size = 16 * 1024, }; +static bool +mlx5_tc_ct_entry_has_nat(struct mlx5_ct_entry *entry) +{ + return !!(entry->tuple_nat_node.next); +} + static int mlx5_tc_ct_rule_to_tuple(struct mlx5_ct_tuple *tuple, struct flow_rule *rule) { @@ -394,13 +401,14 @@ mlx5_tc_ct_set_tuple_match(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec, } static void -mlx5_tc_ct_shared_counter_put(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_entry *entry) +mlx5_tc_ct_counter_put(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_entry *entry) { - if (!refcount_dec_and_test(&entry->shared_counter->refcount)) + if (entry->counter->is_shared && + !refcount_dec_and_test(&entry->counter->refcount)) return; - mlx5_fc_destroy(ct_priv->dev, entry->shared_counter->counter); - kfree(entry->shared_counter); + mlx5_fc_destroy(ct_priv->dev, entry->counter->counter); + kfree(entry->counter); } static void @@ -699,7 +707,7 @@ mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv, attr->dest_ft = ct_priv->post_ct; attr->ft = nat ? ct_priv->ct_nat : ct_priv->ct; attr->outer_match_level = MLX5_MATCH_L4; - attr->counter = entry->shared_counter->counter; + attr->counter = entry->counter->counter; attr->flags |= MLX5_ESW_ATTR_FLAG_NO_IN_PORT; mlx5_tc_ct_set_tuple_match(netdev_priv(ct_priv->netdev), spec, flow_rule); @@ -732,13 +740,34 @@ mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv, return err; } -static struct mlx5_ct_shared_counter * +static struct mlx5_ct_counter * +mlx5_tc_ct_counter_create(struct mlx5_tc_ct_priv *ct_priv) +{ + struct mlx5_ct_counter *counter; + int ret; + + counter = kzalloc(sizeof(*counter), GFP_KERNEL); + if (!counter) + return ERR_PTR(-ENOMEM); + + counter->is_shared = false; + counter->counter = mlx5_fc_create(ct_priv->dev, true); + if (IS_ERR(counter->counter)) { + ct_dbg("Failed to create counter for ct entry"); + ret = PTR_ERR(counter->counter); + kfree(counter); + return ERR_PTR(ret); + } + + return counter; +} + +static struct mlx5_ct_counter * mlx5_tc_ct_shared_counter_get(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_entry *entry) { struct mlx5_ct_tuple rev_tuple = entry->tuple; - struct mlx5_ct_shared_counter *shared_counter; - struct mlx5_core_dev *dev = ct_priv->dev; + struct mlx5_ct_counter *shared_counter; struct mlx5_ct_entry *rev_entry; __be16 tmp_port; int ret; @@ -767,25 +796,20 @@ mlx5_tc_ct_shared_counter_get(struct mlx5_tc_ct_priv *ct_priv, rev_entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_ht, &rev_tuple, tuples_ht_params); if (rev_entry) { - if (refcount_inc_not_zero(&rev_entry->shared_counter->refcount)) { + if (refcount_inc_not_zero(&rev_entry->counter->refcount)) { mutex_unlock(&ct_priv->shared_counter_lock); - return rev_entry->shared_counter; + return rev_entry->counter; } } mutex_unlock(&ct_priv->shared_counter_lock); - shared_counter = kzalloc(sizeof(*shared_counter), GFP_KERNEL); - if (!shared_counter) - return ERR_PTR(-ENOMEM); - - shared_counter->counter = mlx5_fc_create(dev, true); - if (IS_ERR(shared_counter->counter)) { - ct_dbg("Failed to create counter for ct entry"); - ret = PTR_ERR(shared_counter->counter); - kfree(shared_counter); + shared_counter = mlx5_tc_ct_counter_create(ct_priv); + if (IS_ERR(shared_counter)) { + ret = PTR_ERR(shared_counter); return ERR_PTR(ret); } + shared_counter->is_shared = true; refcount_set(&shared_counter->refcount, 1); return shared_counter; } @@ -798,10 +822,13 @@ mlx5_tc_ct_entry_add_rules(struct mlx5_tc_ct_priv *ct_priv, { int err; - entry->shared_counter = mlx5_tc_ct_shared_counter_get(ct_priv, entry); - if (IS_ERR(entry->shared_counter)) { - err = PTR_ERR(entry->shared_counter); - ct_dbg("Failed to create counter for ct entry"); + if (nf_ct_acct_enabled(dev_net(ct_priv->netdev))) + entry->counter = mlx5_tc_ct_counter_create(ct_priv); + else + entry->counter = mlx5_tc_ct_shared_counter_get(ct_priv, entry); + + if (IS_ERR(entry->counter)) { + err = PTR_ERR(entry->counter); return err; } @@ -820,7 +847,7 @@ mlx5_tc_ct_entry_add_rules(struct mlx5_tc_ct_priv *ct_priv, err_nat: mlx5_tc_ct_entry_del_rule(ct_priv, entry, false); err_orig: - mlx5_tc_ct_shared_counter_put(ct_priv, entry); + mlx5_tc_ct_counter_put(ct_priv, entry); return err; } @@ -890,13 +917,13 @@ mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft *ft, err_insert: mlx5_tc_ct_entry_del_rules(ct_priv, entry); err_rules: - rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht, - &entry->tuple_nat_node, tuples_nat_ht_params); + if (mlx5_tc_ct_entry_has_nat(entry)) + rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht, + &entry->tuple_nat_node, tuples_nat_ht_params); err_tuple_nat: - if (entry->tuple_node.next) - rhashtable_remove_fast(&ct_priv->ct_tuples_ht, - &entry->tuple_node, - tuples_ht_params); + rhashtable_remove_fast(&ct_priv->ct_tuples_ht, + &entry->tuple_node, + tuples_ht_params); err_tuple: err_set: kfree(entry); @@ -911,14 +938,14 @@ mlx5_tc_ct_del_ft_entry(struct mlx5_tc_ct_priv *ct_priv, { mlx5_tc_ct_entry_del_rules(ct_priv, entry); mutex_lock(&ct_priv->shared_counter_lock); - if (entry->tuple_node.next) + if (mlx5_tc_ct_entry_has_nat(entry)) rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht, &entry->tuple_nat_node, tuples_nat_ht_params); rhashtable_remove_fast(&ct_priv->ct_tuples_ht, &entry->tuple_node, tuples_ht_params); mutex_unlock(&ct_priv->shared_counter_lock); - mlx5_tc_ct_shared_counter_put(ct_priv, entry); + mlx5_tc_ct_counter_put(ct_priv, entry); } @@ -956,7 +983,7 @@ mlx5_tc_ct_block_flow_offload_stats(struct mlx5_ct_ft *ft, if (!entry) return -ENOENT; - mlx5_fc_query_cached(entry->shared_counter->counter, &bytes, &packets, &lastuse); + mlx5_fc_query_cached(entry->counter->counter, &bytes, &packets, &lastuse); flow_stats_update(&f->stats, bytes, packets, 0, lastuse, FLOW_ACTION_HW_STATS_DELAYED); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h index 07ee1d236ab3e6..2ed17913af80e5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h @@ -366,6 +366,15 @@ struct mlx5e_swp_spec { u8 tun_l4_proto; }; +static inline void mlx5e_eseg_swp_offsets_add_vlan(struct mlx5_wqe_eth_seg *eseg) +{ + /* SWP offsets are in 2-bytes words */ + eseg->swp_outer_l3_offset += VLAN_HLEN / 2; + eseg->swp_outer_l4_offset += VLAN_HLEN / 2; + eseg->swp_inner_l3_offset += VLAN_HLEN / 2; + eseg->swp_inner_l4_offset += VLAN_HLEN / 2; +} + static inline void mlx5e_set_eseg_swp(struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg, struct mlx5e_swp_spec *swp_spec) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h index 899b98aca0d3ff..1fae7fab8297e4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h @@ -51,7 +51,7 @@ static inline bool mlx5_geneve_tx_allowed(struct mlx5_core_dev *mdev) } static inline void -mlx5e_tx_tunnel_accel(struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg) +mlx5e_tx_tunnel_accel(struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg, u16 ihs) { struct mlx5e_swp_spec swp_spec = {}; unsigned int offset = 0; @@ -85,6 +85,8 @@ mlx5e_tx_tunnel_accel(struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg) } mlx5e_set_eseg_swp(skb, eseg, &swp_spec); + if (skb_vlan_tag_present(skb) && ihs) + mlx5e_eseg_swp_offsets_add_vlan(eseg); } #else @@ -163,7 +165,7 @@ static inline unsigned int mlx5e_accel_tx_ids_len(struct mlx5e_txqsq *sq, static inline bool mlx5e_accel_tx_eseg(struct mlx5e_priv *priv, struct sk_buff *skb, - struct mlx5_wqe_eth_seg *eseg) + struct mlx5_wqe_eth_seg *eseg, u16 ihs) { #ifdef CONFIG_MLX5_EN_IPSEC if (xfrm_offload(skb)) @@ -172,7 +174,7 @@ static inline bool mlx5e_accel_tx_eseg(struct mlx5e_priv *priv, #if IS_ENABLED(CONFIG_GENEVE) if (skb->encapsulation) - mlx5e_tx_tunnel_accel(skb, eseg); + mlx5e_tx_tunnel_accel(skb, eseg, ihs); #endif return true; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c index 6c5c54bcd9be08..5cb936541b9e9e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c @@ -76,7 +76,7 @@ static const struct counter_desc mlx5e_ipsec_sw_stats_desc[] = { static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(ipsec_sw) { - return NUM_IPSEC_SW_COUNTERS; + return priv->ipsec ? NUM_IPSEC_SW_COUNTERS : 0; } static inline MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(ipsec_sw) {} @@ -105,7 +105,7 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(ipsec_sw) static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(ipsec_hw) { - return (mlx5_fpga_ipsec_device_caps(priv->mdev)) ? NUM_IPSEC_HW_COUNTERS : 0; + return (priv->ipsec && mlx5_fpga_ipsec_device_caps(priv->mdev)) ? NUM_IPSEC_HW_COUNTERS : 0; } static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(ipsec_hw) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c index d20243d6a03260..f23c67575073a5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c @@ -1151,6 +1151,7 @@ static int mlx5e_set_trust_state(struct mlx5e_priv *priv, u8 trust_state) { struct mlx5e_channels new_channels = {}; bool reset_channels = true; + bool opened; int err = 0; mutex_lock(&priv->state_lock); @@ -1159,22 +1160,24 @@ static int mlx5e_set_trust_state(struct mlx5e_priv *priv, u8 trust_state) mlx5e_params_calc_trust_tx_min_inline_mode(priv->mdev, &new_channels.params, trust_state); - if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { - priv->channels.params = new_channels.params; + opened = test_bit(MLX5E_STATE_OPENED, &priv->state); + if (!opened) reset_channels = false; - } /* Skip if tx_min_inline is the same */ if (new_channels.params.tx_min_inline_mode == priv->channels.params.tx_min_inline_mode) reset_channels = false; - if (reset_channels) + if (reset_channels) { err = mlx5e_safe_switch_channels(priv, &new_channels, mlx5e_update_trust_state_hw, &trust_state); - else + } else { err = mlx5e_update_trust_state_hw(priv, &trust_state); + if (!err && !opened) + priv->channels.params = new_channels.params; + } mutex_unlock(&priv->state_lock); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index d25a56ec6876a1..e596f050c4316e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -444,12 +444,18 @@ int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv, goto out; } - new_channels.params = priv->channels.params; + new_channels.params = *cur_params; new_channels.params.num_channels = count; if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { + struct mlx5e_params old_params; + + old_params = *cur_params; *cur_params = new_channels.params; err = mlx5e_num_channels_changed(priv); + if (err) + *cur_params = old_params; + goto out; } @@ -1007,6 +1013,22 @@ static int mlx5e_get_link_ksettings(struct net_device *netdev, return mlx5e_ethtool_get_link_ksettings(priv, link_ksettings); } +static int mlx5e_speed_validate(struct net_device *netdev, bool ext, + const unsigned long link_modes, u8 autoneg) +{ + /* Extended link-mode has no speed limitations. */ + if (ext) + return 0; + + if ((link_modes & MLX5E_PROT_MASK(MLX5E_56GBASE_R4)) && + autoneg != AUTONEG_ENABLE) { + netdev_err(netdev, "%s: 56G link speed requires autoneg enabled\n", + __func__); + return -EINVAL; + } + return 0; +} + static u32 mlx5e_ethtool2ptys_adver_link(const unsigned long *link_modes) { u32 i, ptys_modes = 0; @@ -1100,13 +1122,9 @@ int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv, link_modes = autoneg == AUTONEG_ENABLE ? ethtool2ptys_adver_func(adver) : mlx5e_port_speed2linkmodes(mdev, speed, !ext); - if ((link_modes & MLX5E_PROT_MASK(MLX5E_56GBASE_R4)) && - autoneg != AUTONEG_ENABLE) { - netdev_err(priv->netdev, "%s: 56G link speed requires autoneg enabled\n", - __func__); - err = -EINVAL; + err = mlx5e_speed_validate(priv->netdev, ext, link_modes, autoneg); + if (err) goto out; - } link_modes = link_modes & eproto.cap; if (!link_modes) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c index 1f48f99c0997d9..7ad332d8625b92 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c @@ -936,6 +936,7 @@ static int mlx5e_create_ttc_table_groups(struct mlx5e_ttc_table *ttc, in = kvzalloc(inlen, GFP_KERNEL); if (!in) { kfree(ft->g); + ft->g = NULL; return -ENOMEM; } @@ -1081,6 +1082,7 @@ static int mlx5e_create_inner_ttc_table_groups(struct mlx5e_ttc_table *ttc) in = kvzalloc(inlen, GFP_KERNEL); if (!in) { kfree(ft->g); + ft->g = NULL; return -ENOMEM; } @@ -1384,6 +1386,7 @@ static int mlx5e_create_l2_table_groups(struct mlx5e_l2_table *l2_table) ft->g[ft->num_groups] = NULL; mlx5e_destroy_groups(ft); kvfree(in); + kfree(ft->g); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index ebce97921e03ca..c9b5d7f29911ea 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3580,7 +3580,14 @@ static int mlx5e_setup_tc_mqprio(struct mlx5e_priv *priv, new_channels.params.num_tc = tc ? tc : 1; if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { + struct mlx5e_params old_params; + + old_params = priv->channels.params; priv->channels.params = new_channels.params; + err = mlx5e_num_channels_changed(priv); + if (err) + priv->channels.params = old_params; + goto out; } @@ -3723,7 +3730,7 @@ static int set_feature_lro(struct net_device *netdev, bool enable) struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5_core_dev *mdev = priv->mdev; struct mlx5e_channels new_channels = {}; - struct mlx5e_params *old_params; + struct mlx5e_params *cur_params; int err = 0; bool reset; @@ -3736,8 +3743,8 @@ static int set_feature_lro(struct net_device *netdev, bool enable) goto out; } - old_params = &priv->channels.params; - if (enable && !MLX5E_GET_PFLAG(old_params, MLX5E_PFLAG_RX_STRIDING_RQ)) { + cur_params = &priv->channels.params; + if (enable && !MLX5E_GET_PFLAG(cur_params, MLX5E_PFLAG_RX_STRIDING_RQ)) { netdev_warn(netdev, "can't set LRO with legacy RQ\n"); err = -EINVAL; goto out; @@ -3745,18 +3752,23 @@ static int set_feature_lro(struct net_device *netdev, bool enable) reset = test_bit(MLX5E_STATE_OPENED, &priv->state); - new_channels.params = *old_params; + new_channels.params = *cur_params; new_channels.params.lro_en = enable; - if (old_params->rq_wq_type != MLX5_WQ_TYPE_CYCLIC) { - if (mlx5e_rx_mpwqe_is_linear_skb(mdev, old_params, NULL) == + if (cur_params->rq_wq_type != MLX5_WQ_TYPE_CYCLIC) { + if (mlx5e_rx_mpwqe_is_linear_skb(mdev, cur_params, NULL) == mlx5e_rx_mpwqe_is_linear_skb(mdev, &new_channels.params, NULL)) reset = false; } if (!reset) { - *old_params = new_channels.params; + struct mlx5e_params old_params; + + old_params = *cur_params; + *cur_params = new_channels.params; err = mlx5e_modify_tirs_lro(priv); + if (err) + *cur_params = old_params; goto out; } @@ -4030,9 +4042,16 @@ int mlx5e_change_mtu(struct net_device *netdev, int new_mtu, } if (!reset) { + unsigned int old_mtu = params->sw_mtu; + params->sw_mtu = new_mtu; - if (preactivate) - preactivate(priv, NULL); + if (preactivate) { + err = preactivate(priv, NULL); + if (err) { + params->sw_mtu = old_mtu; + goto out; + } + } netdev->mtu = params->sw_mtu; goto out; } @@ -4990,7 +5009,7 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) FT_CAP(modify_root) && FT_CAP(identified_miss_table_mode) && FT_CAP(flow_table_modify)) { -#ifdef CONFIG_MLX5_ESWITCH +#if IS_ENABLED(CONFIG_MLX5_CLS_ACT) netdev->hw_features |= NETIF_F_HW_TC; #endif #ifdef CONFIG_MLX5_EN_ARFS diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 67247c33b9fd65..304435e5611705 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -738,7 +738,9 @@ static void mlx5e_build_rep_netdev(struct net_device *netdev) netdev->features |= NETIF_F_NETNS_LOCAL; +#if IS_ENABLED(CONFIG_MLX5_CLS_ACT) netdev->hw_features |= NETIF_F_HW_TC; +#endif netdev->hw_features |= NETIF_F_SG; netdev->hw_features |= NETIF_F_IP_CSUM; netdev->hw_features |= NETIF_F_IPV6_CSUM; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index ce710f22b1fffb..4b8a442f09cd63 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -67,6 +67,7 @@ #include "lib/geneve.h" #include "lib/fs_chains.h" #include "diag/en_tc_tracepoint.h" +#include #define nic_chains(priv) ((priv)->fs.tc.chains) #define MLX5_MH_ACT_SZ MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto) @@ -1164,6 +1165,9 @@ mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw, struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts; struct mlx5_flow_handle *rule; + if (attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH) + return mlx5_eswitch_add_offloaded_rule(esw, spec, attr); + if (flow_flag_test(flow, CT)) { mod_hdr_acts = &attr->parse_attr->mod_hdr_acts; @@ -1194,6 +1198,9 @@ mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw, { flow_flag_clear(flow, OFFLOADED); + if (attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH) + goto offload_rule_0; + if (flow_flag_test(flow, CT)) { mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), flow, attr); return; @@ -1202,6 +1209,7 @@ mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw, if (attr->esw_attr->split_count) mlx5_eswitch_del_fwd_rule(esw, flow->rule[1], attr); +offload_rule_0: mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr); } @@ -2271,8 +2279,8 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, BIT(FLOW_DISSECTOR_KEY_ENC_OPTS) | BIT(FLOW_DISSECTOR_KEY_MPLS))) { NL_SET_ERR_MSG_MOD(extack, "Unsupported key"); - netdev_warn(priv->netdev, "Unsupported key used: 0x%x\n", - dissector->used_keys); + netdev_dbg(priv->netdev, "Unsupported key used: 0x%x\n", + dissector->used_keys); return -EOPNOTSUPP; } @@ -5009,13 +5017,13 @@ int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv, return err; } -static int apply_police_params(struct mlx5e_priv *priv, u32 rate, +static int apply_police_params(struct mlx5e_priv *priv, u64 rate, struct netlink_ext_ack *extack) { struct mlx5e_rep_priv *rpriv = priv->ppriv; struct mlx5_eswitch *esw; + u32 rate_mbps = 0; u16 vport_num; - u32 rate_mbps; int err; vport_num = rpriv->rep->vport; @@ -5032,7 +5040,11 @@ static int apply_police_params(struct mlx5e_priv *priv, u32 rate, * Moreover, if rate is non zero we choose to configure to a minimum of * 1 mbit/sec. */ - rate_mbps = rate ? max_t(u32, (rate * 8 + 500000) / 1000000, 1) : 0; + if (rate) { + rate = (rate * BITS_PER_BYTE) + 500000; + rate_mbps = max_t(u32, do_div(rate, 1000000), 1); + } + err = mlx5_esw_modify_vport_rate(esw, vport_num, rate_mbps); if (err) NL_SET_ERR_MSG_MOD(extack, "failed applying action to hardware"); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index d97203cf6a007f..38a23d209b33b3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -615,9 +615,9 @@ void mlx5e_tx_mpwqe_ensure_complete(struct mlx5e_txqsq *sq) static bool mlx5e_txwqe_build_eseg(struct mlx5e_priv *priv, struct mlx5e_txqsq *sq, struct sk_buff *skb, struct mlx5e_accel_tx_state *accel, - struct mlx5_wqe_eth_seg *eseg) + struct mlx5_wqe_eth_seg *eseg, u16 ihs) { - if (unlikely(!mlx5e_accel_tx_eseg(priv, skb, eseg))) + if (unlikely(!mlx5e_accel_tx_eseg(priv, skb, eseg, ihs))) return false; mlx5e_txwqe_build_eseg_csum(sq, skb, accel, eseg); @@ -647,7 +647,8 @@ netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev) if (mlx5e_tx_skb_supports_mpwqe(skb, &attr)) { struct mlx5_wqe_eth_seg eseg = {}; - if (unlikely(!mlx5e_txwqe_build_eseg(priv, sq, skb, &accel, &eseg))) + if (unlikely(!mlx5e_txwqe_build_eseg(priv, sq, skb, &accel, &eseg, + attr.ihs))) return NETDEV_TX_OK; mlx5e_sq_xmit_mpwqe(sq, skb, &eseg, netdev_xmit_more()); @@ -664,7 +665,7 @@ netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev) /* May update the WQE, but may not post other WQEs. */ mlx5e_accel_tx_finish(sq, wqe, &accel, (struct mlx5_wqe_inline_seg *)(wqe->data + wqe_attr.ds_cnt_inl)); - if (unlikely(!mlx5e_txwqe_build_eseg(priv, sq, skb, &accel, &wqe->eth))) + if (unlikely(!mlx5e_txwqe_build_eseg(priv, sq, skb, &accel, &wqe->eth, attr.ihs))) return NETDEV_TX_OK; mlx5e_sq_xmit_wqe(sq, skb, &attr, &wqe_attr, wqe, pi, netdev_xmit_more()); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c index d46f8b225ebe33..3e19b1721303f5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c @@ -95,22 +95,21 @@ int esw_acl_egress_lgcy_setup(struct mlx5_eswitch *esw, return 0; } - if (!IS_ERR_OR_NULL(vport->egress.acl)) - return 0; - - vport->egress.acl = esw_acl_table_create(esw, vport->vport, - MLX5_FLOW_NAMESPACE_ESW_EGRESS, - table_size); - if (IS_ERR_OR_NULL(vport->egress.acl)) { - err = PTR_ERR(vport->egress.acl); - vport->egress.acl = NULL; - goto out; + if (!vport->egress.acl) { + vport->egress.acl = esw_acl_table_create(esw, vport->vport, + MLX5_FLOW_NAMESPACE_ESW_EGRESS, + table_size); + if (IS_ERR(vport->egress.acl)) { + err = PTR_ERR(vport->egress.acl); + vport->egress.acl = NULL; + goto out; + } + + err = esw_acl_egress_lgcy_groups_create(esw, vport); + if (err) + goto out; } - err = esw_acl_egress_lgcy_groups_create(esw, vport); - if (err) - goto out; - esw_debug(esw->dev, "vport[%d] configure egress rules, vlan(%d) qos(%d)\n", vport->vport, vport->info.vlan, vport->info.qos); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c index c3faae67e4d6ee..4c74e2690d57bc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c @@ -173,7 +173,7 @@ int esw_acl_egress_ofld_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport table_size++; vport->egress.acl = esw_acl_table_create(esw, vport->vport, MLX5_FLOW_NAMESPACE_ESW_EGRESS, table_size); - if (IS_ERR_OR_NULL(vport->egress.acl)) { + if (IS_ERR(vport->egress.acl)) { err = PTR_ERR(vport->egress.acl); vport->egress.acl = NULL; return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c index b68976b378b81a..d64fad2823e73c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c @@ -180,7 +180,7 @@ int esw_acl_ingress_lgcy_setup(struct mlx5_eswitch *esw, vport->ingress.acl = esw_acl_table_create(esw, vport->vport, MLX5_FLOW_NAMESPACE_ESW_INGRESS, table_size); - if (IS_ERR_OR_NULL(vport->ingress.acl)) { + if (IS_ERR(vport->ingress.acl)) { err = PTR_ERR(vport->ingress.acl); vport->ingress.acl = NULL; return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c index 4e55d7225a265c..548c005ea63358 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c @@ -258,7 +258,7 @@ int esw_acl_ingress_ofld_setup(struct mlx5_eswitch *esw, vport->ingress.acl = esw_acl_table_create(esw, vport->vport, MLX5_FLOW_NAMESPACE_ESW_INGRESS, num_ftes); - if (IS_ERR_OR_NULL(vport->ingress.acl)) { + if (IS_ERR(vport->ingress.acl)) { err = PTR_ERR(vport->ingress.acl); vport->ingress.acl = NULL; return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 9fdd99272e3101..634c2bfd25be11 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -1141,6 +1141,7 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa destroy_ft: root->cmds->destroy_flow_table(root, ft); free_ft: + rhltable_destroy(&ft->fgs_hash); kfree(ft); unlock_root: mutex_unlock(&root->chain_lock); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c index 33081b24f10aad..9025e5f38bb658 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c @@ -556,7 +556,9 @@ void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev) struct mlx5_core_dev *tmp_dev; int i, err; - if (!MLX5_CAP_GEN(dev, vport_group_manager)) + if (!MLX5_CAP_GEN(dev, vport_group_manager) || + !MLX5_CAP_GEN(dev, lag_master) || + MLX5_CAP_GEN(dev, num_lag_ports) != MLX5_MAX_PORTS) return; tmp_dev = mlx5_get_next_phys_dev(dev); @@ -574,12 +576,9 @@ void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev) if (mlx5_lag_dev_add_pf(ldev, dev, netdev) < 0) return; - for (i = 0; i < MLX5_MAX_PORTS; i++) { - tmp_dev = ldev->pf[i].dev; - if (!tmp_dev || !MLX5_CAP_GEN(tmp_dev, lag_master) || - MLX5_CAP_GEN(tmp_dev, num_lag_ports) != MLX5_MAX_PORTS) + for (i = 0; i < MLX5_MAX_PORTS; i++) + if (!ldev->pf[i].dev) break; - } if (i >= MLX5_MAX_PORTS) ldev->flags |= MLX5_LAG_FLAG_READY; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h index 3a9fa629503f0e..d046db7bb047d5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h @@ -90,4 +90,9 @@ int mlx5_create_encryption_key(struct mlx5_core_dev *mdev, u32 key_type, u32 *p_key_id); void mlx5_destroy_encryption_key(struct mlx5_core_dev *mdev, u32 key_id); +static inline struct net *mlx5_core_net(struct mlx5_core_dev *dev) +{ + return devlink_net(priv_to_devlink(dev)); +} + #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 8ff207aa147928..e455a2f31f0703 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -50,6 +50,7 @@ #ifdef CONFIG_RFS_ACCEL #include #endif +#include #include #include "mlx5_core.h" #include "lib/eq.h" @@ -233,7 +234,10 @@ static void mlx5_set_driver_version(struct mlx5_core_dev *dev) strncat(string, ",", remaining_size); remaining_size = max_t(int, 0, driver_ver_sz - strlen(string)); - strncat(string, DRIVER_VERSION, remaining_size); + + snprintf(string + strlen(string), remaining_size, "%u.%u.%u", + (u8)((LINUX_VERSION_CODE >> 16) & 0xff), (u8)((LINUX_VERSION_CODE >> 8) & 0xff), + (u16)(LINUX_VERSION_CODE & 0xffff)); /*Send the command*/ MLX5_SET(set_driver_version_in, in, opcode, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c index 4d7f8a357df76e..a3e0c71831928b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c @@ -58,7 +58,7 @@ struct fw_page { struct rb_node rb_node; u64 addr; struct page *page; - u16 func_id; + u32 function; unsigned long bitmask; struct list_head list; unsigned free_count; @@ -74,12 +74,17 @@ enum { MLX5_NUM_4K_IN_PAGE = PAGE_SIZE / MLX5_ADAPTER_PAGE_SIZE, }; -static struct rb_root *page_root_per_func_id(struct mlx5_core_dev *dev, u16 func_id) +static u32 get_function(u16 func_id, bool ec_function) +{ + return func_id & (ec_function << 16); +} + +static struct rb_root *page_root_per_function(struct mlx5_core_dev *dev, u32 function) { struct rb_root *root; int err; - root = xa_load(&dev->priv.page_root_xa, func_id); + root = xa_load(&dev->priv.page_root_xa, function); if (root) return root; @@ -87,7 +92,7 @@ static struct rb_root *page_root_per_func_id(struct mlx5_core_dev *dev, u16 func if (!root) return ERR_PTR(-ENOMEM); - err = xa_insert(&dev->priv.page_root_xa, func_id, root, GFP_KERNEL); + err = xa_insert(&dev->priv.page_root_xa, function, root, GFP_KERNEL); if (err) { kfree(root); return ERR_PTR(err); @@ -98,7 +103,7 @@ static struct rb_root *page_root_per_func_id(struct mlx5_core_dev *dev, u16 func return root; } -static int insert_page(struct mlx5_core_dev *dev, u64 addr, struct page *page, u16 func_id) +static int insert_page(struct mlx5_core_dev *dev, u64 addr, struct page *page, u32 function) { struct rb_node *parent = NULL; struct rb_root *root; @@ -107,7 +112,7 @@ static int insert_page(struct mlx5_core_dev *dev, u64 addr, struct page *page, u struct fw_page *tfp; int i; - root = page_root_per_func_id(dev, func_id); + root = page_root_per_function(dev, function); if (IS_ERR(root)) return PTR_ERR(root); @@ -130,7 +135,7 @@ static int insert_page(struct mlx5_core_dev *dev, u64 addr, struct page *page, u nfp->addr = addr; nfp->page = page; - nfp->func_id = func_id; + nfp->function = function; nfp->free_count = MLX5_NUM_4K_IN_PAGE; for (i = 0; i < MLX5_NUM_4K_IN_PAGE; i++) set_bit(i, &nfp->bitmask); @@ -143,14 +148,14 @@ static int insert_page(struct mlx5_core_dev *dev, u64 addr, struct page *page, u } static struct fw_page *find_fw_page(struct mlx5_core_dev *dev, u64 addr, - u32 func_id) + u32 function) { struct fw_page *result = NULL; struct rb_root *root; struct rb_node *tmp; struct fw_page *tfp; - root = xa_load(&dev->priv.page_root_xa, func_id); + root = xa_load(&dev->priv.page_root_xa, function); if (WARN_ON_ONCE(!root)) return NULL; @@ -194,14 +199,14 @@ static int mlx5_cmd_query_pages(struct mlx5_core_dev *dev, u16 *func_id, return err; } -static int alloc_4k(struct mlx5_core_dev *dev, u64 *addr, u16 func_id) +static int alloc_4k(struct mlx5_core_dev *dev, u64 *addr, u32 function) { struct fw_page *fp = NULL; struct fw_page *iter; unsigned n; list_for_each_entry(iter, &dev->priv.free_list, list) { - if (iter->func_id != func_id) + if (iter->function != function) continue; fp = iter; } @@ -231,7 +236,7 @@ static void free_fwp(struct mlx5_core_dev *dev, struct fw_page *fwp, { struct rb_root *root; - root = xa_load(&dev->priv.page_root_xa, fwp->func_id); + root = xa_load(&dev->priv.page_root_xa, fwp->function); if (WARN_ON_ONCE(!root)) return; @@ -244,12 +249,12 @@ static void free_fwp(struct mlx5_core_dev *dev, struct fw_page *fwp, kfree(fwp); } -static void free_4k(struct mlx5_core_dev *dev, u64 addr, u32 func_id) +static void free_4k(struct mlx5_core_dev *dev, u64 addr, u32 function) { struct fw_page *fwp; int n; - fwp = find_fw_page(dev, addr & MLX5_U64_4K_PAGE_MASK, func_id); + fwp = find_fw_page(dev, addr & MLX5_U64_4K_PAGE_MASK, function); if (!fwp) { mlx5_core_warn_rl(dev, "page not found\n"); return; @@ -263,7 +268,7 @@ static void free_4k(struct mlx5_core_dev *dev, u64 addr, u32 func_id) list_add(&fwp->list, &dev->priv.free_list); } -static int alloc_system_page(struct mlx5_core_dev *dev, u16 func_id) +static int alloc_system_page(struct mlx5_core_dev *dev, u32 function) { struct device *device = mlx5_core_dma_dev(dev); int nid = dev_to_node(device); @@ -291,7 +296,7 @@ static int alloc_system_page(struct mlx5_core_dev *dev, u16 func_id) goto map; } - err = insert_page(dev, addr, page, func_id); + err = insert_page(dev, addr, page, function); if (err) { mlx5_core_err(dev, "failed to track allocated page\n"); dma_unmap_page(device, addr, PAGE_SIZE, DMA_BIDIRECTIONAL); @@ -328,6 +333,7 @@ static void page_notify_fail(struct mlx5_core_dev *dev, u16 func_id, static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages, int notify_fail, bool ec_function) { + u32 function = get_function(func_id, ec_function); u32 out[MLX5_ST_SZ_DW(manage_pages_out)] = {0}; int inlen = MLX5_ST_SZ_BYTES(manage_pages_in); u64 addr; @@ -345,10 +351,10 @@ static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages, for (i = 0; i < npages; i++) { retry: - err = alloc_4k(dev, &addr, func_id); + err = alloc_4k(dev, &addr, function); if (err) { if (err == -ENOMEM) - err = alloc_system_page(dev, func_id); + err = alloc_system_page(dev, function); if (err) goto out_4k; @@ -384,7 +390,7 @@ static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages, out_4k: for (i--; i >= 0; i--) - free_4k(dev, MLX5_GET64(manage_pages_in, in, pas[i]), func_id); + free_4k(dev, MLX5_GET64(manage_pages_in, in, pas[i]), function); out_free: kvfree(in); if (notify_fail) @@ -392,14 +398,15 @@ static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages, return err; } -static void release_all_pages(struct mlx5_core_dev *dev, u32 func_id, +static void release_all_pages(struct mlx5_core_dev *dev, u16 func_id, bool ec_function) { + u32 function = get_function(func_id, ec_function); struct rb_root *root; struct rb_node *p; int npages = 0; - root = xa_load(&dev->priv.page_root_xa, func_id); + root = xa_load(&dev->priv.page_root_xa, function); if (WARN_ON_ONCE(!root)) return; @@ -446,6 +453,7 @@ static int reclaim_pages_cmd(struct mlx5_core_dev *dev, struct rb_root *root; struct fw_page *fwp; struct rb_node *p; + bool ec_function; u32 func_id; u32 npages; u32 i = 0; @@ -456,8 +464,9 @@ static int reclaim_pages_cmd(struct mlx5_core_dev *dev, /* No hard feelings, we want our pages back! */ npages = MLX5_GET(manage_pages_in, in, input_num_entries); func_id = MLX5_GET(manage_pages_in, in, function_id); + ec_function = MLX5_GET(manage_pages_in, in, embedded_cpu_function); - root = xa_load(&dev->priv.page_root_xa, func_id); + root = xa_load(&dev->priv.page_root_xa, get_function(func_id, ec_function)); if (WARN_ON_ONCE(!root)) return -EEXIST; @@ -473,9 +482,10 @@ static int reclaim_pages_cmd(struct mlx5_core_dev *dev, return 0; } -static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages, +static int reclaim_pages(struct mlx5_core_dev *dev, u16 func_id, int npages, int *nclaimed, bool ec_function) { + u32 function = get_function(func_id, ec_function); int outlen = MLX5_ST_SZ_BYTES(manage_pages_out); u32 in[MLX5_ST_SZ_DW(manage_pages_in)] = {}; int num_claimed; @@ -514,7 +524,7 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages, } for (i = 0; i < num_claimed; i++) - free_4k(dev, MLX5_GET64(manage_pages_out, out, pas[i]), func_id); + free_4k(dev, MLX5_GET64(manage_pages_out, out, pas[i]), function); if (nclaimed) *nclaimed = num_claimed; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rdma.c b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c index 0fc7de4aa572fb..8e0dddc6383f07 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/rdma.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c @@ -116,7 +116,7 @@ static int mlx5_rdma_enable_roce_steering(struct mlx5_core_dev *dev) static void mlx5_rdma_del_roce_addr(struct mlx5_core_dev *dev) { mlx5_core_roce_gid_set(dev, 0, 0, 0, - NULL, NULL, false, 0, 0); + NULL, NULL, false, 0, 1); } static void mlx5_rdma_make_default_gid(struct mlx5_core_dev *dev, union ib_gid *gid) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c index 8fa286ccdd6bb2..bf85ce9835d7f9 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c @@ -19,7 +19,7 @@ #define MLXSW_THERMAL_ASIC_TEMP_NORM 75000 /* 75C */ #define MLXSW_THERMAL_ASIC_TEMP_HIGH 85000 /* 85C */ #define MLXSW_THERMAL_ASIC_TEMP_HOT 105000 /* 105C */ -#define MLXSW_THERMAL_ASIC_TEMP_CRIT 110000 /* 110C */ +#define MLXSW_THERMAL_ASIC_TEMP_CRIT 140000 /* 140C */ #define MLXSW_THERMAL_HYSTERESIS_TEMP 5000 /* 5C */ #define MLXSW_THERMAL_MODULE_TEMP_SHIFT (MLXSW_THERMAL_HYSTERESIS_TEMP * 2) #define MLXSW_THERMAL_ZONE_MAX_NAME 16 @@ -176,6 +176,12 @@ mlxsw_thermal_module_trips_update(struct device *dev, struct mlxsw_core *core, if (err) return err; + if (crit_temp > emerg_temp) { + dev_warn(dev, "%s : Critical threshold %d is above emergency threshold %d\n", + tz->tzdev->type, crit_temp, emerg_temp); + return 0; + } + /* According to the system thermal requirements, the thermal zones are * defined with four trip points. The critical and emergency * temperature thresholds, provided by QSFP module are set as "active" @@ -190,11 +196,8 @@ mlxsw_thermal_module_trips_update(struct device *dev, struct mlxsw_core *core, tz->trips[MLXSW_THERMAL_TEMP_TRIP_NORM].temp = crit_temp; tz->trips[MLXSW_THERMAL_TEMP_TRIP_HIGH].temp = crit_temp; tz->trips[MLXSW_THERMAL_TEMP_TRIP_HOT].temp = emerg_temp; - if (emerg_temp > crit_temp) - tz->trips[MLXSW_THERMAL_TEMP_TRIP_CRIT].temp = emerg_temp + + tz->trips[MLXSW_THERMAL_TEMP_TRIP_CRIT].temp = emerg_temp + MLXSW_THERMAL_MODULE_TEMP_SHIFT; - else - tz->trips[MLXSW_THERMAL_TEMP_TRIP_CRIT].temp = emerg_temp; return 0; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c index c6c5826aba41e8..1892cea05ee7cc 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c @@ -157,6 +157,7 @@ mlxsw_sp1_span_entry_cpu_deconfigure(struct mlxsw_sp_span_entry *span_entry) static const struct mlxsw_sp_span_entry_ops mlxsw_sp1_span_entry_ops_cpu = { + .is_static = true, .can_handle = mlxsw_sp1_span_cpu_can_handle, .parms_set = mlxsw_sp1_span_entry_cpu_parms, .configure = mlxsw_sp1_span_entry_cpu_configure, @@ -214,6 +215,7 @@ mlxsw_sp_span_entry_phys_deconfigure(struct mlxsw_sp_span_entry *span_entry) static const struct mlxsw_sp_span_entry_ops mlxsw_sp_span_entry_ops_phys = { + .is_static = true, .can_handle = mlxsw_sp_port_dev_check, .parms_set = mlxsw_sp_span_entry_phys_parms, .configure = mlxsw_sp_span_entry_phys_configure, @@ -721,6 +723,7 @@ mlxsw_sp2_span_entry_cpu_deconfigure(struct mlxsw_sp_span_entry *span_entry) static const struct mlxsw_sp_span_entry_ops mlxsw_sp2_span_entry_ops_cpu = { + .is_static = true, .can_handle = mlxsw_sp2_span_cpu_can_handle, .parms_set = mlxsw_sp2_span_entry_cpu_parms, .configure = mlxsw_sp2_span_entry_cpu_configure, @@ -1036,6 +1039,9 @@ static void mlxsw_sp_span_respin_work(struct work_struct *work) if (!refcount_read(&curr->ref_count)) continue; + if (curr->ops->is_static) + continue; + err = curr->ops->parms_set(mlxsw_sp, curr->to_dev, &sparms); if (err) continue; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h index d907718bc8c584..aa1cd409c0e2ea 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h @@ -60,6 +60,7 @@ struct mlxsw_sp_span_entry { }; struct mlxsw_sp_span_entry_ops { + bool is_static; bool (*can_handle)(const struct net_device *to_dev); int (*parms_set)(struct mlxsw_sp *mlxsw_sp, const struct net_device *to_dev, diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c index b319c22c211cd3..8947c3a6281094 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.c +++ b/drivers/net/ethernet/microchip/lan743x_main.c @@ -1962,6 +1962,14 @@ static struct sk_buff *lan743x_rx_allocate_skb(struct lan743x_rx *rx) length, GFP_ATOMIC | GFP_DMA); } +static void lan743x_rx_update_tail(struct lan743x_rx *rx, int index) +{ + /* update the tail once per 8 descriptors */ + if ((index & 7) == 7) + lan743x_csr_write(rx->adapter, RX_TAIL(rx->channel_number), + index); +} + static int lan743x_rx_init_ring_element(struct lan743x_rx *rx, int index, struct sk_buff *skb) { @@ -1992,6 +2000,7 @@ static int lan743x_rx_init_ring_element(struct lan743x_rx *rx, int index, descriptor->data0 = (RX_DESC_DATA0_OWN_ | (length & RX_DESC_DATA0_BUF_LENGTH_MASK_)); skb_reserve(buffer_info->skb, RX_HEAD_PADDING); + lan743x_rx_update_tail(rx, index); return 0; } @@ -2010,6 +2019,7 @@ static void lan743x_rx_reuse_ring_element(struct lan743x_rx *rx, int index) descriptor->data0 = (RX_DESC_DATA0_OWN_ | ((buffer_info->buffer_length) & RX_DESC_DATA0_BUF_LENGTH_MASK_)); + lan743x_rx_update_tail(rx, index); } static void lan743x_rx_release_ring_element(struct lan743x_rx *rx, int index) @@ -2220,6 +2230,7 @@ static int lan743x_rx_napi_poll(struct napi_struct *napi, int weight) { struct lan743x_rx *rx = container_of(napi, struct lan743x_rx, napi); struct lan743x_adapter *adapter = rx->adapter; + int result = RX_PROCESS_RESULT_NOTHING_TO_DO; u32 rx_tail_flags = 0; int count; @@ -2228,27 +2239,19 @@ static int lan743x_rx_napi_poll(struct napi_struct *napi, int weight) lan743x_csr_write(adapter, DMAC_INT_STS, DMAC_INT_BIT_RXFRM_(rx->channel_number)); } - count = 0; - while (count < weight) { - int rx_process_result = lan743x_rx_process_packet(rx); - - if (rx_process_result == RX_PROCESS_RESULT_PACKET_RECEIVED) { - count++; - } else if (rx_process_result == - RX_PROCESS_RESULT_NOTHING_TO_DO) { + for (count = 0; count < weight; count++) { + result = lan743x_rx_process_packet(rx); + if (result == RX_PROCESS_RESULT_NOTHING_TO_DO) break; - } else if (rx_process_result == - RX_PROCESS_RESULT_PACKET_DROPPED) { - continue; - } } rx->frame_count += count; - if (count == weight) - goto done; + if (count == weight || result == RX_PROCESS_RESULT_PACKET_RECEIVED) + return weight; if (!napi_complete_done(napi, count)) - goto done; + return count; + /* re-arm interrupts, must write to rx tail on some chip variants */ if (rx->vector_flags & LAN743X_VECTOR_FLAG_VECTOR_ENABLE_AUTO_SET) rx_tail_flags |= RX_TAIL_SET_TOP_INT_VEC_EN_; if (rx->vector_flags & LAN743X_VECTOR_FLAG_SOURCE_ENABLE_AUTO_SET) { @@ -2258,10 +2261,10 @@ static int lan743x_rx_napi_poll(struct napi_struct *napi, int weight) INT_BIT_DMA_RX_(rx->channel_number)); } - /* update RX_TAIL */ - lan743x_csr_write(adapter, RX_TAIL(rx->channel_number), - rx_tail_flags | rx->last_tail); -done: + if (rx_tail_flags) + lan743x_csr_write(adapter, RX_TAIL(rx->channel_number), + rx_tail_flags | rx->last_tail); + return count; } @@ -2405,7 +2408,7 @@ static int lan743x_rx_open(struct lan743x_rx *rx) netif_napi_add(adapter->netdev, &rx->napi, lan743x_rx_napi_poll, - rx->ring_size - 1); + NAPI_POLL_WEIGHT); lan743x_csr_write(adapter, DMAC_CMD, DMAC_CMD_RX_SWR_(rx->channel_number)); diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index a53bd36b11c604..d4768dcb6c699b 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -60,14 +60,27 @@ int ocelot_mact_learn(struct ocelot *ocelot, int port, const unsigned char mac[ETH_ALEN], unsigned int vid, enum macaccess_entry_type type) { + u32 cmd = ANA_TABLES_MACACCESS_VALID | + ANA_TABLES_MACACCESS_DEST_IDX(port) | + ANA_TABLES_MACACCESS_ENTRYTYPE(type) | + ANA_TABLES_MACACCESS_MAC_TABLE_CMD(MACACCESS_CMD_LEARN); + unsigned int mc_ports; + + /* Set MAC_CPU_COPY if the CPU port is used by a multicast entry */ + if (type == ENTRYTYPE_MACv4) + mc_ports = (mac[1] << 8) | mac[2]; + else if (type == ENTRYTYPE_MACv6) + mc_ports = (mac[0] << 8) | mac[1]; + else + mc_ports = 0; + + if (mc_ports & BIT(ocelot->num_phys_ports)) + cmd |= ANA_TABLES_MACACCESS_MAC_CPU_COPY; + ocelot_mact_select(ocelot, mac, vid); /* Issue a write command */ - ocelot_write(ocelot, ANA_TABLES_MACACCESS_VALID | - ANA_TABLES_MACACCESS_DEST_IDX(port) | - ANA_TABLES_MACACCESS_ENTRYTYPE(type) | - ANA_TABLES_MACACCESS_MAC_TABLE_CMD(MACACCESS_CMD_LEARN), - ANA_TABLES_MACACCESS); + ocelot_write(ocelot, cmd, ANA_TABLES_MACACCESS); return ocelot_mact_wait_for_completion(ocelot); } diff --git a/drivers/net/ethernet/mscc/ocelot_net.c b/drivers/net/ethernet/mscc/ocelot_net.c index b34da11acf65b4..d60cd4326f4cdd 100644 --- a/drivers/net/ethernet/mscc/ocelot_net.c +++ b/drivers/net/ethernet/mscc/ocelot_net.c @@ -952,10 +952,8 @@ static int ocelot_netdevice_event(struct notifier_block *unused, struct net_device *dev = netdev_notifier_info_to_dev(ptr); int ret = 0; - if (!ocelot_netdevice_dev_check(dev)) - return 0; - if (event == NETDEV_PRECHANGEUPPER && + ocelot_netdevice_dev_check(dev) && netif_is_lag_master(info->upper_dev)) { struct netdev_lag_upper_info *lag_upper_info = info->upper_info; struct netlink_ext_ack *extack; diff --git a/drivers/net/ethernet/mscc/ocelot_vsc7514.c b/drivers/net/ethernet/mscc/ocelot_vsc7514.c index 1e7729421a825f..9cf2bc5f428927 100644 --- a/drivers/net/ethernet/mscc/ocelot_vsc7514.c +++ b/drivers/net/ethernet/mscc/ocelot_vsc7514.c @@ -1267,7 +1267,7 @@ static int mscc_ocelot_probe(struct platform_device *pdev) err = mscc_ocelot_init_ports(pdev, ports); if (err) - goto out_put_ports; + goto out_ocelot_deinit; if (ocelot->ptp) { err = ocelot_init_timestamp(ocelot, &ocelot_ptp_clock_info); @@ -1282,8 +1282,14 @@ static int mscc_ocelot_probe(struct platform_device *pdev) register_switchdev_notifier(&ocelot_switchdev_nb); register_switchdev_blocking_notifier(&ocelot_switchdev_blocking_nb); + of_node_put(ports); + dev_info(&pdev->dev, "Ocelot switch probed\n"); + return 0; + +out_ocelot_deinit: + ocelot_deinit(ocelot); out_put_ports: of_node_put(ports); return err; diff --git a/drivers/net/ethernet/natsemi/macsonic.c b/drivers/net/ethernet/natsemi/macsonic.c index 776b7d264dc34e..2289e1fe37419a 100644 --- a/drivers/net/ethernet/natsemi/macsonic.c +++ b/drivers/net/ethernet/natsemi/macsonic.c @@ -506,10 +506,14 @@ static int mac_sonic_platform_probe(struct platform_device *pdev) err = register_netdev(dev); if (err) - goto out; + goto undo_probe; return 0; +undo_probe: + dma_free_coherent(lp->device, + SIZEOF_SONIC_DESC * SONIC_BUS_SCALE(lp->dma_bitmode), + lp->descriptors, lp->descriptors_laddr); out: free_netdev(dev); @@ -584,12 +588,16 @@ static int mac_sonic_nubus_probe(struct nubus_board *board) err = register_netdev(ndev); if (err) - goto out; + goto undo_probe; nubus_set_drvdata(board, ndev); return 0; +undo_probe: + dma_free_coherent(lp->device, + SIZEOF_SONIC_DESC * SONIC_BUS_SCALE(lp->dma_bitmode), + lp->descriptors, lp->descriptors_laddr); out: free_netdev(ndev); return err; diff --git a/drivers/net/ethernet/natsemi/xtsonic.c b/drivers/net/ethernet/natsemi/xtsonic.c index afa166ff7aef5e..28d9e98db81a8b 100644 --- a/drivers/net/ethernet/natsemi/xtsonic.c +++ b/drivers/net/ethernet/natsemi/xtsonic.c @@ -229,11 +229,14 @@ int xtsonic_probe(struct platform_device *pdev) sonic_msg_init(dev); if ((err = register_netdev(dev))) - goto out1; + goto undo_probe1; return 0; -out1: +undo_probe1: + dma_free_coherent(lp->device, + SIZEOF_SONIC_DESC * SONIC_BUS_SCALE(lp->dma_bitmode), + lp->descriptors, lp->descriptors_laddr); release_region(dev->base_addr, SONIC_MEM_SIZE); out: free_netdev(dev); diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.c b/drivers/net/ethernet/netronome/nfp/flower/main.c index bb448c82cdc28a..c029950a81e202 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/main.c +++ b/drivers/net/ethernet/netronome/nfp/flower/main.c @@ -860,9 +860,6 @@ static void nfp_flower_clean(struct nfp_app *app) skb_queue_purge(&app_priv->cmsg_skbs_low); flush_work(&app_priv->cmsg_work); - flow_indr_dev_unregister(nfp_flower_indr_setup_tc_cb, app, - nfp_flower_setup_indr_tc_release); - if (app_priv->flower_ext_feats & NFP_FL_FEATS_VF_RLIM) nfp_flower_qos_cleanup(app); @@ -951,6 +948,9 @@ static int nfp_flower_start(struct nfp_app *app) static void nfp_flower_stop(struct nfp_app *app) { nfp_tunnel_config_stop(app); + + flow_indr_dev_unregister(nfp_flower_indr_setup_tc_cb, app, + nfp_flower_setup_indr_tc_release); } static int diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index a12df3946a07cd..d0ae1cf43592df 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -123,6 +123,12 @@ static void ionic_link_status_check(struct ionic_lif *lif) link_up = link_status == IONIC_PORT_OPER_STATUS_UP; if (link_up) { + if (lif->netdev->flags & IFF_UP && netif_running(lif->netdev)) { + mutex_lock(&lif->queue_lock); + ionic_start_queues(lif); + mutex_unlock(&lif->queue_lock); + } + if (!netif_carrier_ok(netdev)) { u32 link_speed; @@ -132,12 +138,6 @@ static void ionic_link_status_check(struct ionic_lif *lif) link_speed / 1000); netif_carrier_on(netdev); } - - if (lif->netdev->flags & IFF_UP && netif_running(lif->netdev)) { - mutex_lock(&lif->queue_lock); - ionic_start_queues(lif); - mutex_unlock(&lif->queue_lock); - } } else { if (netif_carrier_ok(netdev)) { netdev_info(netdev, "Link down\n"); @@ -1129,38 +1129,10 @@ static void ionic_lif_rx_mode(struct ionic_lif *lif, unsigned int rx_mode) lif->rx_mode = rx_mode; } -static void _ionic_lif_rx_mode(struct ionic_lif *lif, unsigned int rx_mode, - bool from_ndo) -{ - struct ionic_deferred_work *work; - - if (from_ndo) { - work = kzalloc(sizeof(*work), GFP_ATOMIC); - if (!work) { - netdev_err(lif->netdev, "%s OOM\n", __func__); - return; - } - work->type = IONIC_DW_TYPE_RX_MODE; - work->rx_mode = rx_mode; - netdev_dbg(lif->netdev, "deferred: rx_mode\n"); - ionic_lif_deferred_enqueue(&lif->deferred, work); - } else { - ionic_lif_rx_mode(lif, rx_mode); - } -} - -static void ionic_dev_uc_sync(struct net_device *netdev, bool from_ndo) -{ - if (from_ndo) - __dev_uc_sync(netdev, ionic_ndo_addr_add, ionic_ndo_addr_del); - else - __dev_uc_sync(netdev, ionic_addr_add, ionic_addr_del); - -} - -static void ionic_set_rx_mode(struct net_device *netdev, bool from_ndo) +static void ionic_set_rx_mode(struct net_device *netdev, bool can_sleep) { struct ionic_lif *lif = netdev_priv(netdev); + struct ionic_deferred_work *work; unsigned int nfilters; unsigned int rx_mode; @@ -1177,7 +1149,10 @@ static void ionic_set_rx_mode(struct net_device *netdev, bool from_ndo) * we remove our overflow flag and check the netdev flags * to see if we can disable NIC PROMISC */ - ionic_dev_uc_sync(netdev, from_ndo); + if (can_sleep) + __dev_uc_sync(netdev, ionic_addr_add, ionic_addr_del); + else + __dev_uc_sync(netdev, ionic_ndo_addr_add, ionic_ndo_addr_del); nfilters = le32_to_cpu(lif->identity->eth.max_ucast_filters); if (netdev_uc_count(netdev) + 1 > nfilters) { rx_mode |= IONIC_RX_MODE_F_PROMISC; @@ -1189,7 +1164,10 @@ static void ionic_set_rx_mode(struct net_device *netdev, bool from_ndo) } /* same for multicast */ - ionic_dev_uc_sync(netdev, from_ndo); + if (can_sleep) + __dev_mc_sync(netdev, ionic_addr_add, ionic_addr_del); + else + __dev_mc_sync(netdev, ionic_ndo_addr_add, ionic_ndo_addr_del); nfilters = le32_to_cpu(lif->identity->eth.max_mcast_filters); if (netdev_mc_count(netdev) > nfilters) { rx_mode |= IONIC_RX_MODE_F_ALLMULTI; @@ -1200,13 +1178,26 @@ static void ionic_set_rx_mode(struct net_device *netdev, bool from_ndo) rx_mode &= ~IONIC_RX_MODE_F_ALLMULTI; } - if (lif->rx_mode != rx_mode) - _ionic_lif_rx_mode(lif, rx_mode, from_ndo); + if (lif->rx_mode != rx_mode) { + if (!can_sleep) { + work = kzalloc(sizeof(*work), GFP_ATOMIC); + if (!work) { + netdev_err(lif->netdev, "%s OOM\n", __func__); + return; + } + work->type = IONIC_DW_TYPE_RX_MODE; + work->rx_mode = rx_mode; + netdev_dbg(lif->netdev, "deferred: rx_mode\n"); + ionic_lif_deferred_enqueue(&lif->deferred, work); + } else { + ionic_lif_rx_mode(lif, rx_mode); + } + } } static void ionic_ndo_set_rx_mode(struct net_device *netdev) { - ionic_set_rx_mode(netdev, true); + ionic_set_rx_mode(netdev, false); } static __le64 ionic_netdev_features_to_nic(netdev_features_t features) @@ -1773,7 +1764,7 @@ static int ionic_txrx_init(struct ionic_lif *lif) if (lif->netdev->features & NETIF_F_RXHASH) ionic_lif_rss_init(lif); - ionic_set_rx_mode(lif->netdev, false); + ionic_set_rx_mode(lif->netdev, true); return 0; diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c index b3d2250c77d049..a81feffb09b8b3 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c @@ -337,7 +337,7 @@ void ionic_rx_fill(struct ionic_queue *q) unsigned int i, j; unsigned int len; - len = netdev->mtu + ETH_HLEN; + len = netdev->mtu + ETH_HLEN + VLAN_HLEN; nfrags = round_up(len, PAGE_SIZE) / PAGE_SIZE; for (i = ionic_q_space_avail(q); i; i--) { diff --git a/drivers/net/ethernet/qlogic/Kconfig b/drivers/net/ethernet/qlogic/Kconfig index 4366c7a8de9515..6b5ddb07ee8331 100644 --- a/drivers/net/ethernet/qlogic/Kconfig +++ b/drivers/net/ethernet/qlogic/Kconfig @@ -78,6 +78,7 @@ config QED depends on PCI select ZLIB_INFLATE select CRC8 + select CRC32 select NET_DEVLINK help This enables the support for Marvell FastLinQ adapters family. diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c index f21847739ef1fe..d258e0ccf94653 100644 --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c @@ -564,11 +564,6 @@ static const struct net_device_ops netxen_netdev_ops = { .ndo_set_features = netxen_set_features, }; -static inline bool netxen_function_zero(struct pci_dev *pdev) -{ - return (PCI_FUNC(pdev->devfn) == 0) ? true : false; -} - static inline void netxen_set_interrupt_mode(struct netxen_adapter *adapter, u32 mode) { @@ -664,7 +659,7 @@ static int netxen_setup_intr(struct netxen_adapter *adapter) netxen_initialize_interrupt_registers(adapter); netxen_set_msix_bit(pdev, 0); - if (netxen_function_zero(pdev)) { + if (adapter->portnum == 0) { if (!netxen_setup_msi_interrupts(adapter, num_msix)) netxen_set_interrupt_mode(adapter, NETXEN_MSI_MODE); else diff --git a/drivers/net/ethernet/qlogic/qede/qede_fp.c b/drivers/net/ethernet/qlogic/qede/qede_fp.c index a2494bf8500798..ca0ee29a57b50a 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_fp.c +++ b/drivers/net/ethernet/qlogic/qede/qede_fp.c @@ -1799,6 +1799,11 @@ netdev_features_t qede_features_check(struct sk_buff *skb, ntohs(udp_hdr(skb)->dest) != gnv_port)) return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); + } else if (l4_proto == IPPROTO_IPIP) { + /* IPIP tunnels are unknown to the device or at least unsupported natively, + * offloads for them can't be done trivially, so disable them for such skb. + */ + return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); } } diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c index 5a7e240fd46987..c2faf96fcade87 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c @@ -2492,6 +2492,7 @@ qlcnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) qlcnic_sriov_vf_register_map(ahw); break; default: + err = -EINVAL; goto err_out_free_hw_res; } diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 85d9c3e30c6994..762cabf16157bd 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -2243,7 +2243,8 @@ static void rtl_pll_power_down(struct rtl8169_private *tp) } switch (tp->mac_version) { - case RTL_GIGA_MAC_VER_25 ... RTL_GIGA_MAC_VER_33: + case RTL_GIGA_MAC_VER_25 ... RTL_GIGA_MAC_VER_26: + case RTL_GIGA_MAC_VER_32 ... RTL_GIGA_MAC_VER_33: case RTL_GIGA_MAC_VER_37: case RTL_GIGA_MAC_VER_39: case RTL_GIGA_MAC_VER_43: @@ -2269,7 +2270,8 @@ static void rtl_pll_power_down(struct rtl8169_private *tp) static void rtl_pll_power_up(struct rtl8169_private *tp) { switch (tp->mac_version) { - case RTL_GIGA_MAC_VER_25 ... RTL_GIGA_MAC_VER_33: + case RTL_GIGA_MAC_VER_25 ... RTL_GIGA_MAC_VER_26: + case RTL_GIGA_MAC_VER_32 ... RTL_GIGA_MAC_VER_33: case RTL_GIGA_MAC_VER_37: case RTL_GIGA_MAC_VER_39: case RTL_GIGA_MAC_VER_43: diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index c6330463293526..d5d236d687e9e8 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -2606,10 +2606,10 @@ static int sh_eth_close(struct net_device *ndev) /* Free all the skbuffs in the Rx queue and the DMA buffer. */ sh_eth_ring_free(ndev); - pm_runtime_put_sync(&mdp->pdev->dev); - mdp->is_opened = 0; + pm_runtime_put(&mdp->pdev->dev); + return 0; } diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c index 82b1c7a5a7a948..ba0e4d2b256a4b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c @@ -129,7 +129,7 @@ static int intel_eth_plat_probe(struct platform_device *pdev) if (ret) { dev_err(&pdev->dev, "Failed to set tx_clk\n"); - return ret; + goto err_remove_config_dt; } } } @@ -143,7 +143,7 @@ static int intel_eth_plat_probe(struct platform_device *pdev) if (ret) { dev_err(&pdev->dev, "Failed to set clk_ptp_ref\n"); - return ret; + goto err_remove_config_dt; } } } diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c index 81ee0a071b4e96..103d2448e9e0dd 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c @@ -236,6 +236,7 @@ static int intel_mgbe_common_data(struct pci_dev *pdev, int ret; int i; + plat->phy_addr = -1; plat->clk_csr = 5; plat->has_gmac = 0; plat->has_gmac4 = 1; @@ -345,7 +346,6 @@ static int ehl_sgmii_data(struct pci_dev *pdev, struct plat_stmmacenet_data *plat) { plat->bus_id = 1; - plat->phy_addr = 0; plat->phy_interface = PHY_INTERFACE_MODE_SGMII; plat->serdes_powerup = intel_serdes_powerup; @@ -362,7 +362,6 @@ static int ehl_rgmii_data(struct pci_dev *pdev, struct plat_stmmacenet_data *plat) { plat->bus_id = 1; - plat->phy_addr = 0; plat->phy_interface = PHY_INTERFACE_MODE_RGMII; return ehl_common_data(pdev, plat); @@ -376,7 +375,7 @@ static int ehl_pse0_common_data(struct pci_dev *pdev, struct plat_stmmacenet_data *plat) { plat->bus_id = 2; - plat->phy_addr = 1; + plat->addr64 = 32; return ehl_common_data(pdev, plat); } @@ -408,7 +407,7 @@ static int ehl_pse1_common_data(struct pci_dev *pdev, struct plat_stmmacenet_data *plat) { plat->bus_id = 3; - plat->phy_addr = 1; + plat->addr64 = 32; return ehl_common_data(pdev, plat); } @@ -450,7 +449,6 @@ static int tgl_sgmii_data(struct pci_dev *pdev, struct plat_stmmacenet_data *plat) { plat->bus_id = 1; - plat->phy_addr = 0; plat->phy_interface = PHY_INTERFACE_MODE_SGMII; plat->serdes_powerup = intel_serdes_powerup; plat->serdes_powerdown = intel_serdes_powerdown; @@ -725,6 +723,8 @@ static SIMPLE_DEV_PM_OPS(intel_eth_pm_ops, intel_eth_pci_suspend, #define PCI_DEVICE_ID_INTEL_EHL_PSE1_RGMII1G_ID 0x4bb0 #define PCI_DEVICE_ID_INTEL_EHL_PSE1_SGMII1G_ID 0x4bb1 #define PCI_DEVICE_ID_INTEL_EHL_PSE1_SGMII2G5_ID 0x4bb2 +#define PCI_DEVICE_ID_INTEL_TGLH_SGMII1G_0_ID 0x43ac +#define PCI_DEVICE_ID_INTEL_TGLH_SGMII1G_1_ID 0x43a2 #define PCI_DEVICE_ID_INTEL_TGL_SGMII1G_ID 0xa0ac static const struct pci_device_id intel_eth_pci_id_table[] = { @@ -739,6 +739,8 @@ static const struct pci_device_id intel_eth_pci_id_table[] = { { PCI_DEVICE_DATA(INTEL, EHL_PSE1_SGMII1G_ID, &ehl_pse1_sgmii1g_info) }, { PCI_DEVICE_DATA(INTEL, EHL_PSE1_SGMII2G5_ID, &ehl_pse1_sgmii1g_info) }, { PCI_DEVICE_DATA(INTEL, TGL_SGMII1G_ID, &tgl_sgmii1g_info) }, + { PCI_DEVICE_DATA(INTEL, TGLH_SGMII1G_0_ID, &tgl_sgmii1g_info) }, + { PCI_DEVICE_DATA(INTEL, TGLH_SGMII1G_1_ID, &tgl_sgmii1g_info) }, {} }; MODULE_DEVICE_TABLE(pci, intel_eth_pci_id_table); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c index 6d6bd77bb6afce..9ddadae8e4c51e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c @@ -135,7 +135,7 @@ static int meson8b_init_rgmii_tx_clk(struct meson8b_dwmac *dwmac) struct device *dev = dwmac->dev; static const struct clk_parent_data mux_parents[] = { { .fw_name = "clkin0", }, - { .fw_name = "clkin1", }, + { .index = -1, }, }; static const struct clk_div_table div_table[] = { { .div = 2, .val = 2, }, diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c index 58e0511badba8a..a5e0eff4a38741 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c @@ -64,6 +64,7 @@ struct emac_variant { * @variant: reference to the current board variant * @regmap: regmap for using the syscon * @internal_phy_powered: Does the internal PHY is enabled + * @use_internal_phy: Is the internal PHY selected for use * @mux_handle: Internal pointer used by mdio-mux lib */ struct sunxi_priv_data { @@ -74,6 +75,7 @@ struct sunxi_priv_data { const struct emac_variant *variant; struct regmap_field *regmap_field; bool internal_phy_powered; + bool use_internal_phy; void *mux_handle; }; @@ -539,8 +541,11 @@ static const struct stmmac_dma_ops sun8i_dwmac_dma_ops = { .dma_interrupt = sun8i_dwmac_dma_interrupt, }; +static int sun8i_dwmac_power_internal_phy(struct stmmac_priv *priv); + static int sun8i_dwmac_init(struct platform_device *pdev, void *priv) { + struct net_device *ndev = platform_get_drvdata(pdev); struct sunxi_priv_data *gmac = priv; int ret; @@ -554,13 +559,25 @@ static int sun8i_dwmac_init(struct platform_device *pdev, void *priv) ret = clk_prepare_enable(gmac->tx_clk); if (ret) { - if (gmac->regulator) - regulator_disable(gmac->regulator); dev_err(&pdev->dev, "Could not enable AHB clock\n"); - return ret; + goto err_disable_regulator; + } + + if (gmac->use_internal_phy) { + ret = sun8i_dwmac_power_internal_phy(netdev_priv(ndev)); + if (ret) + goto err_disable_clk; } return 0; + +err_disable_clk: + clk_disable_unprepare(gmac->tx_clk); +err_disable_regulator: + if (gmac->regulator) + regulator_disable(gmac->regulator); + + return ret; } static void sun8i_dwmac_core_init(struct mac_device_info *hw, @@ -831,7 +848,6 @@ static int mdio_mux_syscon_switch_fn(int current_child, int desired_child, struct sunxi_priv_data *gmac = priv->plat->bsp_priv; u32 reg, val; int ret = 0; - bool need_power_ephy = false; if (current_child ^ desired_child) { regmap_field_read(gmac->regmap_field, ®); @@ -839,13 +855,12 @@ static int mdio_mux_syscon_switch_fn(int current_child, int desired_child, case DWMAC_SUN8I_MDIO_MUX_INTERNAL_ID: dev_info(priv->device, "Switch mux to internal PHY"); val = (reg & ~H3_EPHY_MUX_MASK) | H3_EPHY_SELECT; - - need_power_ephy = true; + gmac->use_internal_phy = true; break; case DWMAC_SUN8I_MDIO_MUX_EXTERNAL_ID: dev_info(priv->device, "Switch mux to external PHY"); val = (reg & ~H3_EPHY_MUX_MASK) | H3_EPHY_SHUTDOWN; - need_power_ephy = false; + gmac->use_internal_phy = false; break; default: dev_err(priv->device, "Invalid child ID %x\n", @@ -853,7 +868,7 @@ static int mdio_mux_syscon_switch_fn(int current_child, int desired_child, return -EINVAL; } regmap_field_write(gmac->regmap_field, val); - if (need_power_ephy) { + if (gmac->use_internal_phy) { ret = sun8i_dwmac_power_internal_phy(priv); if (ret) return ret; @@ -883,22 +898,23 @@ static int sun8i_dwmac_register_mdio_mux(struct stmmac_priv *priv) return ret; } -static int sun8i_dwmac_set_syscon(struct stmmac_priv *priv) +static int sun8i_dwmac_set_syscon(struct device *dev, + struct plat_stmmacenet_data *plat) { - struct sunxi_priv_data *gmac = priv->plat->bsp_priv; - struct device_node *node = priv->device->of_node; + struct sunxi_priv_data *gmac = plat->bsp_priv; + struct device_node *node = dev->of_node; int ret; u32 reg, val; ret = regmap_field_read(gmac->regmap_field, &val); if (ret) { - dev_err(priv->device, "Fail to read from regmap field.\n"); + dev_err(dev, "Fail to read from regmap field.\n"); return ret; } reg = gmac->variant->default_syscon_value; if (reg != val) - dev_warn(priv->device, + dev_warn(dev, "Current syscon value is not the default %x (expect %x)\n", val, reg); @@ -911,9 +927,9 @@ static int sun8i_dwmac_set_syscon(struct stmmac_priv *priv) /* Force EPHY xtal frequency to 24MHz. */ reg |= H3_EPHY_CLK_SEL; - ret = of_mdio_parse_addr(priv->device, priv->plat->phy_node); + ret = of_mdio_parse_addr(dev, plat->phy_node); if (ret < 0) { - dev_err(priv->device, "Could not parse MDIO addr\n"); + dev_err(dev, "Could not parse MDIO addr\n"); return ret; } /* of_mdio_parse_addr returns a valid (0 ~ 31) PHY @@ -929,17 +945,17 @@ static int sun8i_dwmac_set_syscon(struct stmmac_priv *priv) if (!of_property_read_u32(node, "allwinner,tx-delay-ps", &val)) { if (val % 100) { - dev_err(priv->device, "tx-delay must be a multiple of 100\n"); + dev_err(dev, "tx-delay must be a multiple of 100\n"); return -EINVAL; } val /= 100; - dev_dbg(priv->device, "set tx-delay to %x\n", val); + dev_dbg(dev, "set tx-delay to %x\n", val); if (val <= gmac->variant->tx_delay_max) { reg &= ~(gmac->variant->tx_delay_max << SYSCON_ETXDC_SHIFT); reg |= (val << SYSCON_ETXDC_SHIFT); } else { - dev_err(priv->device, "Invalid TX clock delay: %d\n", + dev_err(dev, "Invalid TX clock delay: %d\n", val); return -EINVAL; } @@ -947,17 +963,17 @@ static int sun8i_dwmac_set_syscon(struct stmmac_priv *priv) if (!of_property_read_u32(node, "allwinner,rx-delay-ps", &val)) { if (val % 100) { - dev_err(priv->device, "rx-delay must be a multiple of 100\n"); + dev_err(dev, "rx-delay must be a multiple of 100\n"); return -EINVAL; } val /= 100; - dev_dbg(priv->device, "set rx-delay to %x\n", val); + dev_dbg(dev, "set rx-delay to %x\n", val); if (val <= gmac->variant->rx_delay_max) { reg &= ~(gmac->variant->rx_delay_max << SYSCON_ERXDC_SHIFT); reg |= (val << SYSCON_ERXDC_SHIFT); } else { - dev_err(priv->device, "Invalid RX clock delay: %d\n", + dev_err(dev, "Invalid RX clock delay: %d\n", val); return -EINVAL; } @@ -968,7 +984,7 @@ static int sun8i_dwmac_set_syscon(struct stmmac_priv *priv) if (gmac->variant->support_rmii) reg &= ~SYSCON_RMII_EN; - switch (priv->plat->interface) { + switch (plat->interface) { case PHY_INTERFACE_MODE_MII: /* default */ break; @@ -982,8 +998,8 @@ static int sun8i_dwmac_set_syscon(struct stmmac_priv *priv) reg |= SYSCON_RMII_EN | SYSCON_ETCS_EXT_GMII; break; default: - dev_err(priv->device, "Unsupported interface mode: %s", - phy_modes(priv->plat->interface)); + dev_err(dev, "Unsupported interface mode: %s", + phy_modes(plat->interface)); return -EINVAL; } @@ -1004,17 +1020,10 @@ static void sun8i_dwmac_exit(struct platform_device *pdev, void *priv) struct sunxi_priv_data *gmac = priv; if (gmac->variant->soc_has_internal_phy) { - /* sun8i_dwmac_exit could be called with mdiomux uninit */ - if (gmac->mux_handle) - mdio_mux_uninit(gmac->mux_handle); if (gmac->internal_phy_powered) sun8i_dwmac_unpower_internal_phy(gmac); } - sun8i_dwmac_unset_syscon(gmac); - - reset_control_put(gmac->rst_ephy); - clk_disable_unprepare(gmac->tx_clk); if (gmac->regulator) @@ -1049,16 +1058,11 @@ static struct mac_device_info *sun8i_dwmac_setup(void *ppriv) { struct mac_device_info *mac; struct stmmac_priv *priv = ppriv; - int ret; mac = devm_kzalloc(priv->device, sizeof(*mac), GFP_KERNEL); if (!mac) return NULL; - ret = sun8i_dwmac_set_syscon(priv); - if (ret) - return NULL; - mac->pcsr = priv->ioaddr; mac->mac = &sun8i_dwmac_ops; mac->dma = &sun8i_dwmac_dma_ops; @@ -1134,10 +1138,6 @@ static int sun8i_dwmac_probe(struct platform_device *pdev) if (ret) return ret; - plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac); - if (IS_ERR(plat_dat)) - return PTR_ERR(plat_dat); - gmac = devm_kzalloc(dev, sizeof(*gmac), GFP_KERNEL); if (!gmac) return -ENOMEM; @@ -1201,11 +1201,15 @@ static int sun8i_dwmac_probe(struct platform_device *pdev) ret = of_get_phy_mode(dev->of_node, &interface); if (ret) return -EINVAL; - plat_dat->interface = interface; + + plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac); + if (IS_ERR(plat_dat)) + return PTR_ERR(plat_dat); /* platform data specifying hardware features and callbacks. * hardware features were copied from Allwinner drivers. */ + plat_dat->interface = interface; plat_dat->rx_coe = STMMAC_RX_COE_TYPE2; plat_dat->tx_coe = 1; plat_dat->has_sun8i = true; @@ -1214,9 +1218,13 @@ static int sun8i_dwmac_probe(struct platform_device *pdev) plat_dat->exit = sun8i_dwmac_exit; plat_dat->setup = sun8i_dwmac_setup; + ret = sun8i_dwmac_set_syscon(&pdev->dev, plat_dat); + if (ret) + goto dwmac_deconfig; + ret = sun8i_dwmac_init(pdev, plat_dat->bsp_priv); if (ret) - return ret; + goto dwmac_syscon; ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res); if (ret) @@ -1230,7 +1238,7 @@ static int sun8i_dwmac_probe(struct platform_device *pdev) if (gmac->variant->soc_has_internal_phy) { ret = get_ephy_nodes(priv); if (ret) - goto dwmac_exit; + goto dwmac_remove; ret = sun8i_dwmac_register_mdio_mux(priv); if (ret) { dev_err(&pdev->dev, "Failed to register mux\n"); @@ -1239,15 +1247,42 @@ static int sun8i_dwmac_probe(struct platform_device *pdev) } else { ret = sun8i_dwmac_reset(priv); if (ret) - goto dwmac_exit; + goto dwmac_remove; } return ret; dwmac_mux: - sun8i_dwmac_unset_syscon(gmac); + reset_control_put(gmac->rst_ephy); + clk_put(gmac->ephy_clk); +dwmac_remove: + stmmac_dvr_remove(&pdev->dev); dwmac_exit: + sun8i_dwmac_exit(pdev, gmac); +dwmac_syscon: + sun8i_dwmac_unset_syscon(gmac); +dwmac_deconfig: + stmmac_remove_config_dt(pdev, plat_dat); + + return ret; +} + +static int sun8i_dwmac_remove(struct platform_device *pdev) +{ + struct net_device *ndev = platform_get_drvdata(pdev); + struct stmmac_priv *priv = netdev_priv(ndev); + struct sunxi_priv_data *gmac = priv->plat->bsp_priv; + + if (gmac->variant->soc_has_internal_phy) { + mdio_mux_uninit(gmac->mux_handle); + sun8i_dwmac_unpower_internal_phy(gmac); + reset_control_put(gmac->rst_ephy); + clk_put(gmac->ephy_clk); + } + stmmac_pltfr_remove(pdev); -return ret; + sun8i_dwmac_unset_syscon(gmac); + + return 0; } static const struct of_device_id sun8i_dwmac_match[] = { @@ -1269,7 +1304,7 @@ MODULE_DEVICE_TABLE(of, sun8i_dwmac_match); static struct platform_driver sun8i_dwmac_driver = { .probe = sun8i_dwmac_probe, - .remove = stmmac_pltfr_remove, + .remove = sun8i_dwmac_remove, .driver = { .name = "dwmac-sun8i", .pm = &stmmac_pltfr_pm_ops, diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac5.c b/drivers/net/ethernet/stmicro/stmmac/dwmac5.c index 67ba67ed0cb99f..de5255b951e147 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac5.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac5.c @@ -572,68 +572,24 @@ static int dwmac5_est_write(void __iomem *ioaddr, u32 reg, u32 val, bool gcl) int dwmac5_est_configure(void __iomem *ioaddr, struct stmmac_est *cfg, unsigned int ptp_rate) { - u32 speed, total_offset, offset, ctrl, ctr_low; - u32 extcfg = readl(ioaddr + GMAC_EXT_CONFIG); - u32 mac_cfg = readl(ioaddr + GMAC_CONFIG); int i, ret = 0x0; - u64 total_ctr; - - if (extcfg & GMAC_CONFIG_EIPG_EN) { - offset = (extcfg & GMAC_CONFIG_EIPG) >> GMAC_CONFIG_EIPG_SHIFT; - offset = 104 + (offset * 8); - } else { - offset = (mac_cfg & GMAC_CONFIG_IPG) >> GMAC_CONFIG_IPG_SHIFT; - offset = 96 - (offset * 8); - } - - speed = mac_cfg & (GMAC_CONFIG_PS | GMAC_CONFIG_FES); - speed = speed >> GMAC_CONFIG_FES_SHIFT; - - switch (speed) { - case 0x0: - offset = offset * 1000; /* 1G */ - break; - case 0x1: - offset = offset * 400; /* 2.5G */ - break; - case 0x2: - offset = offset * 100000; /* 10M */ - break; - case 0x3: - offset = offset * 10000; /* 100M */ - break; - default: - return -EINVAL; - } - - offset = offset / 1000; + u32 ctrl; ret |= dwmac5_est_write(ioaddr, BTR_LOW, cfg->btr[0], false); ret |= dwmac5_est_write(ioaddr, BTR_HIGH, cfg->btr[1], false); ret |= dwmac5_est_write(ioaddr, TER, cfg->ter, false); ret |= dwmac5_est_write(ioaddr, LLR, cfg->gcl_size, false); + ret |= dwmac5_est_write(ioaddr, CTR_LOW, cfg->ctr[0], false); + ret |= dwmac5_est_write(ioaddr, CTR_HIGH, cfg->ctr[1], false); if (ret) return ret; - total_offset = 0; for (i = 0; i < cfg->gcl_size; i++) { - ret = dwmac5_est_write(ioaddr, i, cfg->gcl[i] + offset, true); + ret = dwmac5_est_write(ioaddr, i, cfg->gcl[i], true); if (ret) return ret; - - total_offset += offset; } - total_ctr = cfg->ctr[0] + cfg->ctr[1] * 1000000000ULL; - total_ctr += total_offset; - - ctr_low = do_div(total_ctr, 1000000000); - - ret |= dwmac5_est_write(ioaddr, CTR_LOW, ctr_low, false); - ret |= dwmac5_est_write(ioaddr, CTR_HIGH, total_ctr, false); - if (ret) - return ret; - ctrl = readl(ioaddr + MTL_EST_CONTROL); ctrl &= ~PTOV; ctrl |= ((1000000000 / ptp_rate) * 6) << PTOV_SHIFT; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index c33db79cdd0ad1..b3d6d8e3f4de90 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -2158,7 +2158,7 @@ static int stmmac_napi_check(struct stmmac_priv *priv, u32 chan) spin_lock_irqsave(&ch->lock, flags); stmmac_disable_dma_irq(priv, priv->ioaddr, chan, 1, 0); spin_unlock_irqrestore(&ch->lock, flags); - __napi_schedule_irqoff(&ch->rx_napi); + __napi_schedule(&ch->rx_napi); } } @@ -2167,7 +2167,7 @@ static int stmmac_napi_check(struct stmmac_priv *priv, u32 chan) spin_lock_irqsave(&ch->lock, flags); stmmac_disable_dma_irq(priv, priv->ioaddr, chan, 0, 1); spin_unlock_irqrestore(&ch->lock, flags); - __napi_schedule_irqoff(&ch->tx_napi); + __napi_schedule(&ch->tx_napi); } } @@ -3996,6 +3996,7 @@ static int stmmac_change_mtu(struct net_device *dev, int new_mtu) { struct stmmac_priv *priv = netdev_priv(dev); int txfifosz = priv->plat->tx_fifo_size; + const int mtu = new_mtu; if (txfifosz == 0) txfifosz = priv->dma_cap.tx_fifo_size; @@ -4013,7 +4014,7 @@ static int stmmac_change_mtu(struct net_device *dev, int new_mtu) if ((txfifosz < new_mtu) || (new_mtu > BUF_SIZE_16KiB)) return -EINVAL; - dev->mtu = new_mtu; + dev->mtu = mtu; netdev_update_features(dev); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c index cc27d660a81851..06553d028d746b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c @@ -605,7 +605,8 @@ static int tc_setup_taprio(struct stmmac_priv *priv, { u32 size, wid = priv->dma_cap.estwid, dep = priv->dma_cap.estdep; struct plat_stmmacenet_data *plat = priv->plat; - struct timespec64 time; + struct timespec64 time, current_time; + ktime_t current_time_ns; bool fpe = false; int i, ret = 0; u64 ctr; @@ -700,7 +701,22 @@ static int tc_setup_taprio(struct stmmac_priv *priv, } /* Adjust for real system time */ - time = ktime_to_timespec64(qopt->base_time); + priv->ptp_clock_ops.gettime64(&priv->ptp_clock_ops, ¤t_time); + current_time_ns = timespec64_to_ktime(current_time); + if (ktime_after(qopt->base_time, current_time_ns)) { + time = ktime_to_timespec64(qopt->base_time); + } else { + ktime_t base_time; + s64 n; + + n = div64_s64(ktime_sub_ns(current_time_ns, qopt->base_time), + qopt->cycle_time); + base_time = ktime_add_ns(qopt->base_time, + (n + 1) * qopt->cycle_time); + + time = ktime_to_timespec64(base_time); + } + priv->plat->est->btr[0] = (u32)time.tv_nsec; priv->plat->est->btr[1] = (u32)time.tv_sec; diff --git a/drivers/net/ethernet/ti/cpts.c b/drivers/net/ethernet/ti/cpts.c index d1fc7955d42274..43222a34cba069 100644 --- a/drivers/net/ethernet/ti/cpts.c +++ b/drivers/net/ethernet/ti/cpts.c @@ -599,6 +599,7 @@ void cpts_unregister(struct cpts *cpts) ptp_clock_unregister(cpts->clock); cpts->clock = NULL; + cpts->phc_index = -1; cpts_write32(cpts, 0, int_enable); cpts_write32(cpts, 0, control); @@ -784,6 +785,7 @@ struct cpts *cpts_create(struct device *dev, void __iomem *regs, cpts->cc.read = cpts_systim_read; cpts->cc.mask = CLOCKSOURCE_MASK(32); cpts->info = cpts_info; + cpts->phc_index = -1; if (n_ext_ts) cpts->info.n_ext_ts = n_ext_ts; diff --git a/drivers/net/ipa/ipa_modem.c b/drivers/net/ipa/ipa_modem.c index e34fe2d77324eb..9b08eb82398463 100644 --- a/drivers/net/ipa/ipa_modem.c +++ b/drivers/net/ipa/ipa_modem.c @@ -216,6 +216,7 @@ int ipa_modem_start(struct ipa *ipa) ipa->name_map[IPA_ENDPOINT_AP_MODEM_TX]->netdev = netdev; ipa->name_map[IPA_ENDPOINT_AP_MODEM_RX]->netdev = netdev; + SET_NETDEV_DEV(netdev, &ipa->pdev->dev); priv = netdev_priv(netdev); priv->ipa = ipa; diff --git a/drivers/net/phy/smsc.c b/drivers/net/phy/smsc.c index 0fc39ac5ca88b9..10722fed666dec 100644 --- a/drivers/net/phy/smsc.c +++ b/drivers/net/phy/smsc.c @@ -284,7 +284,8 @@ static int smsc_phy_probe(struct phy_device *phydev) /* Make clk optional to keep DTB backward compatibility. */ priv->refclk = clk_get_optional(dev, NULL); if (IS_ERR(priv->refclk)) - dev_err_probe(dev, PTR_ERR(priv->refclk), "Failed to request clock\n"); + return dev_err_probe(dev, PTR_ERR(priv->refclk), + "Failed to request clock\n"); ret = clk_prepare_enable(priv->refclk); if (ret) diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 07f1f39339271e..615f3776b4bee6 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -991,7 +991,8 @@ static void __team_compute_features(struct team *team) unsigned int dst_release_flag = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM; - list_for_each_entry(port, &team->port_list, list) { + rcu_read_lock(); + list_for_each_entry_rcu(port, &team->port_list, list) { vlan_features = netdev_increment_features(vlan_features, port->dev->vlan_features, TEAM_VLAN_FEATURES); @@ -1005,6 +1006,7 @@ static void __team_compute_features(struct team *team) if (port->dev->hard_header_len > max_hard_header_len) max_hard_header_len = port->dev->hard_header_len; } + rcu_read_unlock(); team->dev->vlan_features = vlan_features; team->dev->hw_enc_features = enc_features | NETIF_F_GSO_ENCAP_ALL | @@ -1020,9 +1022,7 @@ static void __team_compute_features(struct team *team) static void team_compute_features(struct team *team) { - mutex_lock(&team->lock); __team_compute_features(team); - mutex_unlock(&team->lock); netdev_change_features(team->dev); } diff --git a/drivers/net/tun.c b/drivers/net/tun.c index cd06cae760356f..1ac80756e5afa5 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1401,7 +1401,7 @@ static struct sk_buff *tun_napi_alloc_frags(struct tun_file *tfile, int i; if (it->nr_segs > MAX_SKB_FRAGS + 1) - return ERR_PTR(-ENOMEM); + return ERR_PTR(-EMSGSIZE); local_bh_disable(); skb = napi_get_frags(&tfile->napi); diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c index 8c1d61c2cbacbc..6aaa0675c28a39 100644 --- a/drivers/net/usb/cdc_ether.c +++ b/drivers/net/usb/cdc_ether.c @@ -793,6 +793,13 @@ static const struct usb_device_id products[] = { .driver_info = 0, }, +/* Lenovo Powered USB-C Travel Hub (4X90S92381, based on Realtek RTL8153) */ +{ + USB_DEVICE_AND_INTERFACE_INFO(LENOVO_VENDOR_ID, 0x721e, USB_CLASS_COMM, + USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), + .driver_info = 0, +}, + /* ThinkPad USB-C Dock Gen 2 (based on Realtek RTL8153) */ { USB_DEVICE_AND_INTERFACE_INFO(LENOVO_VENDOR_ID, 0xa387, USB_CLASS_COMM, diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index e04f588538ccbd..854c6624e68597 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -1199,7 +1199,10 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign) * accordingly. Otherwise, we should check here. */ if (ctx->drvflags & CDC_NCM_FLAG_NDP_TO_END) - delayed_ndp_size = ALIGN(ctx->max_ndp_size, ctx->tx_ndp_modulus); + delayed_ndp_size = ctx->max_ndp_size + + max_t(u32, + ctx->tx_ndp_modulus, + ctx->tx_modulus + ctx->tx_remainder) - 1; else delayed_ndp_size = 0; @@ -1410,7 +1413,8 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign) if (!(dev->driver_info->flags & FLAG_SEND_ZLP) && skb_out->len > ctx->min_tx_pkt) { padding_count = ctx->tx_curr_size - skb_out->len; - skb_put_zero(skb_out, padding_count); + if (!WARN_ON(padding_count > ctx->tx_curr_size)) + skb_put_zero(skb_out, padding_count); } else if (skb_out->len < ctx->tx_curr_size && (skb_out->len % dev->maxpacket) == 0) { skb_put_u8(skb_out, 0); /* force short packet */ @@ -1863,9 +1867,6 @@ static void cdc_ncm_status(struct usbnet *dev, struct urb *urb) * USB_CDC_NOTIFY_NETWORK_CONNECTION notification shall be * sent by device after USB_CDC_NOTIFY_SPEED_CHANGE. */ - netif_info(dev, link, dev->net, - "network connection: %sconnected\n", - !!event->wValue ? "" : "dis"); usbnet_link_change(dev, !!event->wValue, 0); break; diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index fc378ff56775bc..ce73df4c137ea6 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1036,6 +1036,7 @@ static const struct usb_device_id products[] = { {QMI_MATCH_FF_FF_FF(0x2c7c, 0x0125)}, /* Quectel EC25, EC20 R2.0 Mini PCIe */ {QMI_MATCH_FF_FF_FF(0x2c7c, 0x0306)}, /* Quectel EP06/EG06/EM06 */ {QMI_MATCH_FF_FF_FF(0x2c7c, 0x0512)}, /* Quectel EG12/EM12 */ + {QMI_MATCH_FF_FF_FF(0x2c7c, 0x0620)}, /* Quectel EM160R-GL */ {QMI_MATCH_FF_FF_FF(0x2c7c, 0x0800)}, /* Quectel RM500Q-GL */ /* 3. Combined interface devices matching on interface number */ @@ -1324,6 +1325,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x0b3c, 0xc00a, 6)}, /* Olivetti Olicard 160 */ {QMI_FIXED_INTF(0x0b3c, 0xc00b, 4)}, /* Olivetti Olicard 500 */ {QMI_FIXED_INTF(0x1e2d, 0x0060, 4)}, /* Cinterion PLxx */ + {QMI_QUIRK_SET_DTR(0x1e2d, 0x006f, 8)}, /* Cinterion PLS83/PLS63 */ {QMI_FIXED_INTF(0x1e2d, 0x0053, 4)}, /* Cinterion PHxx,PXxx */ {QMI_FIXED_INTF(0x1e2d, 0x0063, 10)}, /* Cinterion ALASxx (1 RmNet) */ {QMI_FIXED_INTF(0x1e2d, 0x0082, 4)}, /* Cinterion PHxx,PXxx (2 RmNet) */ diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index b1770489aca519..88f177aca342e8 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -6893,6 +6893,7 @@ static const struct usb_device_id rtl8152_table[] = { {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x7205)}, {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x720c)}, {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x7214)}, + {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x721e)}, {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0xa387)}, {REALTEK_USB_DEVICE(VENDOR_ID_LINKSYS, 0x0041)}, {REALTEK_USB_DEVICE(VENDOR_ID_NVIDIA, 0x09ff)}, diff --git a/drivers/net/usb/rndis_host.c b/drivers/net/usb/rndis_host.c index 6fa7a009a24a48..f9b359d4e29398 100644 --- a/drivers/net/usb/rndis_host.c +++ b/drivers/net/usb/rndis_host.c @@ -387,7 +387,7 @@ generic_rndis_bind(struct usbnet *dev, struct usb_interface *intf, int flags) reply_len = sizeof *phym; retval = rndis_query(dev, intf, u.buf, RNDIS_OID_GEN_PHYSICAL_MEDIUM, - 0, (void **) &phym, &reply_len); + reply_len, (void **)&phym, &reply_len); if (retval != 0 || !phym) { /* OID is optional so don't fail here. */ phym_unspec = cpu_to_le32(RNDIS_PHYSICAL_MEDIUM_UNSPECIFIED); diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 21b71148c53241..a93ab630f1c287 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1259,9 +1259,11 @@ static bool try_fill_recv(struct virtnet_info *vi, struct receive_queue *rq, if (virtqueue_kick_prepare(rq->vq) && virtqueue_notify(rq->vq)) { unsigned long flags; + local_bh_disable(); flags = u64_stats_update_begin_irqsave(&rq->stats.syncp); rq->stats.kicks++; u64_stats_update_end_irqrestore(&rq->stats.syncp, flags); + local_bh_enable(); } return !oom; @@ -2093,14 +2095,16 @@ static int virtnet_set_channels(struct net_device *dev, get_online_cpus(); err = _virtnet_set_queues(vi, queue_pairs); - if (!err) { - netif_set_real_num_tx_queues(dev, queue_pairs); - netif_set_real_num_rx_queues(dev, queue_pairs); - - virtnet_set_affinity(vi); + if (err) { + put_online_cpus(); + goto err; } + virtnet_set_affinity(vi); put_online_cpus(); + netif_set_real_num_tx_queues(dev, queue_pairs); + netif_set_real_num_rx_queues(dev, queue_pairs); + err: return err; } @@ -3072,6 +3076,7 @@ static int virtnet_probe(struct virtio_device *vdev) dev_err(&vdev->dev, "device MTU appears to have changed it is now %d < %d", mtu, dev->min_mtu); + err = -EINVAL; goto free; } diff --git a/drivers/net/wan/Kconfig b/drivers/net/wan/Kconfig index 39e5ab261d7ce0..4be2a5cf022c83 100644 --- a/drivers/net/wan/Kconfig +++ b/drivers/net/wan/Kconfig @@ -282,6 +282,7 @@ config SLIC_DS26522 tristate "Slic Maxim ds26522 card support" depends on SPI depends on FSL_SOC || ARCH_MXC || ARCH_LAYERSCAPE || COMPILE_TEST + select BITREVERSE help This module initializes and configures the slic maxim card in T1 or E1 mode. diff --git a/drivers/net/wan/hdlc_ppp.c b/drivers/net/wan/hdlc_ppp.c index 64f85565133696..261b53fc8e04cb 100644 --- a/drivers/net/wan/hdlc_ppp.c +++ b/drivers/net/wan/hdlc_ppp.c @@ -569,6 +569,13 @@ static void ppp_timer(struct timer_list *t) unsigned long flags; spin_lock_irqsave(&ppp->lock, flags); + /* mod_timer could be called after we entered this function but + * before we got the lock. + */ + if (timer_pending(&proto->timer)) { + spin_unlock_irqrestore(&ppp->lock, flags); + return; + } switch (proto->state) { case STOPPING: case REQ_SENT: diff --git a/drivers/net/wireless/admtek/adm8211.c b/drivers/net/wireless/admtek/adm8211.c index 5cf2045fadeff3..c41e72508d3dbf 100644 --- a/drivers/net/wireless/admtek/adm8211.c +++ b/drivers/net/wireless/admtek/adm8211.c @@ -1796,6 +1796,7 @@ static int adm8211_probe(struct pci_dev *pdev, if (io_len < 256 || mem_len < 1024) { printk(KERN_ERR "%s (adm8211): Too short PCI resources\n", pci_name(pdev)); + err = -ENOMEM; goto err_disable_pdev; } @@ -1805,6 +1806,7 @@ static int adm8211_probe(struct pci_dev *pdev, if (reg != ADM8211_SIG1 && reg != ADM8211_SIG2) { printk(KERN_ERR "%s (adm8211): Invalid signature (0x%x)\n", pci_name(pdev), reg); + err = -EINVAL; goto err_disable_pdev; } @@ -1815,8 +1817,8 @@ static int adm8211_probe(struct pci_dev *pdev, return err; /* someone else grabbed it? don't disable it */ } - if (dma_set_mask(&pdev->dev, DMA_BIT_MASK(32)) || - dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32))) { + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); + if (err) { printk(KERN_ERR "%s (adm8211): No suitable DMA available\n", pci_name(pdev)); goto err_free_reg; diff --git a/drivers/net/wireless/ath/ath10k/usb.c b/drivers/net/wireless/ath/ath10k/usb.c index 05a620ff6fe2cc..19b9c27e30e209 100644 --- a/drivers/net/wireless/ath/ath10k/usb.c +++ b/drivers/net/wireless/ath/ath10k/usb.c @@ -997,6 +997,8 @@ static int ath10k_usb_probe(struct usb_interface *interface, ar_usb = ath10k_usb_priv(ar); ret = ath10k_usb_create(ar, interface); + if (ret) + goto err; ar_usb->ar = ar; ar->dev_id = product_id; @@ -1009,7 +1011,7 @@ static int ath10k_usb_probe(struct usb_interface *interface, ret = ath10k_core_register(ar, &bus_params); if (ret) { ath10k_warn(ar, "failed to register driver core: %d\n", ret); - goto err; + goto err_usb_destroy; } /* TODO: remove this once USB support is fully implemented */ @@ -1017,6 +1019,9 @@ static int ath10k_usb_probe(struct usb_interface *interface, return 0; +err_usb_destroy: + ath10k_usb_destroy(ar); + err: ath10k_core_destroy(ar); diff --git a/drivers/net/wireless/ath/ath10k/wmi-tlv.c b/drivers/net/wireless/ath/ath10k/wmi-tlv.c index 932266d1111bd4..7b5834157fe512 100644 --- a/drivers/net/wireless/ath/ath10k/wmi-tlv.c +++ b/drivers/net/wireless/ath/ath10k/wmi-tlv.c @@ -1401,13 +1401,15 @@ static int ath10k_wmi_tlv_svc_avail_parse(struct ath10k *ar, u16 tag, u16 len, switch (tag) { case WMI_TLV_TAG_STRUCT_SERVICE_AVAILABLE_EVENT: + arg->service_map_ext_valid = true; arg->service_map_ext_len = *(__le32 *)ptr; arg->service_map_ext = ptr + sizeof(__le32); return 0; default: break; } - return -EPROTO; + + return 0; } static int ath10k_wmi_tlv_op_pull_svc_avail(struct ath10k *ar, diff --git a/drivers/net/wireless/ath/ath10k/wmi.c b/drivers/net/wireless/ath/ath10k/wmi.c index 1fa7107a505151..37b53af760d766 100644 --- a/drivers/net/wireless/ath/ath10k/wmi.c +++ b/drivers/net/wireless/ath/ath10k/wmi.c @@ -5751,8 +5751,13 @@ void ath10k_wmi_event_service_available(struct ath10k *ar, struct sk_buff *skb) ret); } - ath10k_wmi_map_svc_ext(ar, arg.service_map_ext, ar->wmi.svc_map, - __le32_to_cpu(arg.service_map_ext_len)); + /* + * Initialization of "arg.service_map_ext_valid" to ZERO is necessary + * for the below logic to work. + */ + if (arg.service_map_ext_valid) + ath10k_wmi_map_svc_ext(ar, arg.service_map_ext, ar->wmi.svc_map, + __le32_to_cpu(arg.service_map_ext_len)); } static int ath10k_wmi_event_temperature(struct ath10k *ar, struct sk_buff *skb) diff --git a/drivers/net/wireless/ath/ath10k/wmi.h b/drivers/net/wireless/ath/ath10k/wmi.h index 4898e19b0af656..66ecf09068c197 100644 --- a/drivers/net/wireless/ath/ath10k/wmi.h +++ b/drivers/net/wireless/ath/ath10k/wmi.h @@ -6917,6 +6917,7 @@ struct wmi_svc_rdy_ev_arg { }; struct wmi_svc_avail_ev_arg { + bool service_map_ext_valid; __le32 service_map_ext_len; const __le32 *service_map_ext; }; diff --git a/drivers/net/wireless/ath/ath11k/core.h b/drivers/net/wireless/ath/ath11k/core.h index 18b97420f0d8a3..5a7915f75e1e29 100644 --- a/drivers/net/wireless/ath/ath11k/core.h +++ b/drivers/net/wireless/ath/ath11k/core.h @@ -75,12 +75,14 @@ static inline enum wme_ac ath11k_tid_to_ac(u32 tid) enum ath11k_skb_flags { ATH11K_SKB_HW_80211_ENCAP = BIT(0), + ATH11K_SKB_CIPHER_SET = BIT(1), }; struct ath11k_skb_cb { dma_addr_t paddr; u8 eid; u8 flags; + u32 cipher; struct ath11k *ar; struct ieee80211_vif *vif; } __packed; diff --git a/drivers/net/wireless/ath/ath11k/dp_rx.c b/drivers/net/wireless/ath/ath11k/dp_rx.c index 01625327eef7ce..3638501a095933 100644 --- a/drivers/net/wireless/ath/ath11k/dp_rx.c +++ b/drivers/net/wireless/ath/ath11k/dp_rx.c @@ -2272,6 +2272,7 @@ static void ath11k_dp_rx_h_ppdu(struct ath11k *ar, struct hal_rx_desc *rx_desc, { u8 channel_num; u32 center_freq; + struct ieee80211_channel *channel; rx_status->freq = 0; rx_status->rate_idx = 0; @@ -2292,9 +2293,12 @@ static void ath11k_dp_rx_h_ppdu(struct ath11k *ar, struct hal_rx_desc *rx_desc, rx_status->band = NL80211_BAND_5GHZ; } else { spin_lock_bh(&ar->data_lock); - rx_status->band = ar->rx_channel->band; - channel_num = - ieee80211_frequency_to_channel(ar->rx_channel->center_freq); + channel = ar->rx_channel; + if (channel) { + rx_status->band = channel->band; + channel_num = + ieee80211_frequency_to_channel(channel->center_freq); + } spin_unlock_bh(&ar->data_lock); ath11k_dbg_dump(ar->ab, ATH11K_DBG_DATA, NULL, "rx_desc: ", rx_desc, sizeof(struct hal_rx_desc)); diff --git a/drivers/net/wireless/ath/ath11k/dp_tx.c b/drivers/net/wireless/ath/ath11k/dp_tx.c index 3d962eee4d61dd..21dfd08d3debb6 100644 --- a/drivers/net/wireless/ath/ath11k/dp_tx.c +++ b/drivers/net/wireless/ath/ath11k/dp_tx.c @@ -84,7 +84,6 @@ int ath11k_dp_tx(struct ath11k *ar, struct ath11k_vif *arvif, struct ath11k_dp *dp = &ab->dp; struct hal_tx_info ti = {0}; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); - struct ieee80211_key_conf *key = info->control.hw_key; struct ath11k_skb_cb *skb_cb = ATH11K_SKB_CB(skb); struct hal_srng *tcl_ring; struct ieee80211_hdr *hdr = (void *)skb->data; @@ -149,9 +148,9 @@ int ath11k_dp_tx(struct ath11k *ar, struct ath11k_vif *arvif, ti.meta_data_flags = arvif->tcl_metadata; if (ti.encap_type == HAL_TCL_ENCAP_TYPE_RAW) { - if (key) { + if (skb_cb->flags & ATH11K_SKB_CIPHER_SET) { ti.encrypt_type = - ath11k_dp_tx_get_encrypt_type(key->cipher); + ath11k_dp_tx_get_encrypt_type(skb_cb->cipher); if (ieee80211_has_protected(hdr->frame_control)) skb_put(skb, IEEE80211_CCMP_MIC_LEN); diff --git a/drivers/net/wireless/ath/ath11k/hw.c b/drivers/net/wireless/ath/ath11k/hw.c index 11a411b76fe42a..66331da3501291 100644 --- a/drivers/net/wireless/ath/ath11k/hw.c +++ b/drivers/net/wireless/ath/ath11k/hw.c @@ -127,7 +127,7 @@ static void ath11k_init_wmi_config_ipq8074(struct ath11k_base *ab, config->beacon_tx_offload_max_vdev = ab->num_radios * TARGET_MAX_BCN_OFFLD; config->rx_batchmode = TARGET_RX_BATCHMODE; config->peer_map_unmap_v2_support = 1; - config->twt_ap_pdev_count = 2; + config->twt_ap_pdev_count = ab->num_radios; config->twt_ap_sta_count = 1000; } @@ -157,7 +157,7 @@ static int ath11k_hw_mac_id_to_srng_id_qca6390(struct ath11k_hw_params *hw, const struct ath11k_hw_ops ipq8074_ops = { .get_hw_mac_from_pdev_id = ath11k_hw_ipq8074_mac_from_pdev_id, - .wmi_init_config = ath11k_init_wmi_config_qca6390, + .wmi_init_config = ath11k_init_wmi_config_ipq8074, .mac_id_to_pdev_id = ath11k_hw_mac_id_to_pdev_id_ipq8074, .mac_id_to_srng_id = ath11k_hw_mac_id_to_srng_id_ipq8074, }; diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c index 7f8dd47d233337..af427d9051a07e 100644 --- a/drivers/net/wireless/ath/ath11k/mac.c +++ b/drivers/net/wireless/ath/ath11k/mac.c @@ -3977,21 +3977,20 @@ static void ath11k_mgmt_over_wmi_tx_purge(struct ath11k *ar) static void ath11k_mgmt_over_wmi_tx_work(struct work_struct *work) { struct ath11k *ar = container_of(work, struct ath11k, wmi_mgmt_tx_work); - struct ieee80211_tx_info *info; + struct ath11k_skb_cb *skb_cb; struct ath11k_vif *arvif; struct sk_buff *skb; int ret; while ((skb = skb_dequeue(&ar->wmi_mgmt_tx_queue)) != NULL) { - info = IEEE80211_SKB_CB(skb); - if (!info->control.vif) { - ath11k_warn(ar->ab, "no vif found for mgmt frame, flags 0x%x\n", - info->control.flags); + skb_cb = ATH11K_SKB_CB(skb); + if (!skb_cb->vif) { + ath11k_warn(ar->ab, "no vif found for mgmt frame\n"); ieee80211_free_txskb(ar->hw, skb); continue; } - arvif = ath11k_vif_to_arvif(info->control.vif); + arvif = ath11k_vif_to_arvif(skb_cb->vif); if (ar->allocated_vdev_map & (1LL << arvif->vdev_id) && arvif->is_started) { ret = ath11k_mac_mgmt_tx_wmi(ar, arvif, skb); @@ -4004,8 +4003,8 @@ static void ath11k_mgmt_over_wmi_tx_work(struct work_struct *work) } } else { ath11k_warn(ar->ab, - "dropping mgmt frame for vdev %d, flags 0x%x is_started %d\n", - arvif->vdev_id, info->control.flags, + "dropping mgmt frame for vdev %d, is_started %d\n", + arvif->vdev_id, arvif->is_started); ieee80211_free_txskb(ar->hw, skb); } @@ -4053,10 +4052,20 @@ static void ath11k_mac_op_tx(struct ieee80211_hw *hw, struct ieee80211_vif *vif = info->control.vif; struct ath11k_vif *arvif = ath11k_vif_to_arvif(vif); struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; + struct ieee80211_key_conf *key = info->control.hw_key; + u32 info_flags = info->flags; bool is_prb_rsp; int ret; - if (info->flags & IEEE80211_TX_CTL_HW_80211_ENCAP) { + memset(skb_cb, 0, sizeof(*skb_cb)); + skb_cb->vif = vif; + + if (key) { + skb_cb->cipher = key->cipher; + skb_cb->flags |= ATH11K_SKB_CIPHER_SET; + } + + if (info_flags & IEEE80211_TX_CTL_HW_80211_ENCAP) { skb_cb->flags |= ATH11K_SKB_HW_80211_ENCAP; } else if (ieee80211_is_mgmt(hdr->frame_control)) { is_prb_rsp = ieee80211_is_probe_resp(hdr->frame_control); @@ -4094,7 +4103,8 @@ static int ath11k_mac_config_mon_status_default(struct ath11k *ar, bool enable) if (enable) { tlv_filter = ath11k_mac_mon_status_filter_default; - tlv_filter.rx_filter = ath11k_debugfs_rx_filter(ar); + if (ath11k_debugfs_rx_filter(ar)) + tlv_filter.rx_filter = ath11k_debugfs_rx_filter(ar); } for (i = 0; i < ab->hw_params.num_rxmda_per_pdev; i++) { @@ -5225,20 +5235,26 @@ ath11k_mac_op_assign_vif_chanctx(struct ieee80211_hw *hw, arvif->vdev_type != WMI_VDEV_TYPE_AP && arvif->vdev_type != WMI_VDEV_TYPE_MONITOR) { memcpy(&arvif->chanctx, ctx, sizeof(*ctx)); - mutex_unlock(&ar->conf_mutex); - return 0; + ret = 0; + goto out; } if (WARN_ON(arvif->is_started)) { - mutex_unlock(&ar->conf_mutex); - return -EBUSY; + ret = -EBUSY; + goto out; } if (ab->hw_params.vdev_start_delay) { param.vdev_id = arvif->vdev_id; param.peer_type = WMI_PEER_TYPE_DEFAULT; param.peer_addr = ar->mac_addr; + ret = ath11k_peer_create(ar, arvif, NULL, ¶m); + if (ret) { + ath11k_warn(ab, "failed to create peer after vdev start delay: %d", + ret); + goto out; + } } ret = ath11k_mac_vdev_start(arvif, &ctx->def); @@ -5246,23 +5262,21 @@ ath11k_mac_op_assign_vif_chanctx(struct ieee80211_hw *hw, ath11k_warn(ab, "failed to start vdev %i addr %pM on freq %d: %d\n", arvif->vdev_id, vif->addr, ctx->def.chan->center_freq, ret); - goto err; + goto out; } if (arvif->vdev_type == WMI_VDEV_TYPE_MONITOR) { ret = ath11k_monitor_vdev_up(ar, arvif->vdev_id); if (ret) - goto err; + goto out; } arvif->is_started = true; /* TODO: Setup ps and cts/rts protection */ - mutex_unlock(&ar->conf_mutex); - - return 0; + ret = 0; -err: +out: mutex_unlock(&ar->conf_mutex); return ret; diff --git a/drivers/net/wireless/ath/ath11k/qmi.c b/drivers/net/wireless/ath/ath11k/qmi.c index c2b16515822592..2ae7c6bf091e9b 100644 --- a/drivers/net/wireless/ath/ath11k/qmi.c +++ b/drivers/net/wireless/ath/ath11k/qmi.c @@ -1585,15 +1585,17 @@ static int ath11k_qmi_fw_ind_register_send(struct ath11k_base *ab) struct qmi_wlanfw_ind_register_resp_msg_v01 *resp; struct qmi_handle *handle = &ab->qmi.handle; struct qmi_txn txn; - int ret = 0; + int ret; req = kzalloc(sizeof(*req), GFP_KERNEL); if (!req) return -ENOMEM; resp = kzalloc(sizeof(*resp), GFP_KERNEL); - if (!resp) + if (!resp) { + ret = -ENOMEM; goto resp_out; + } req->client_id_valid = 1; req->client_id = QMI_WLANFW_CLIENT_ID; @@ -1652,6 +1654,7 @@ static int ath11k_qmi_respond_fw_mem_request(struct ath11k_base *ab) struct qmi_wlanfw_respond_mem_resp_msg_v01 resp; struct qmi_txn txn = {}; int ret = 0, i; + bool delayed; req = kzalloc(sizeof(*req), GFP_KERNEL); if (!req) @@ -1664,11 +1667,13 @@ static int ath11k_qmi_respond_fw_mem_request(struct ath11k_base *ab) * failure to FW and FW will then request mulitple blocks of small * chunk size memory. */ - if (!ab->bus_params.fixed_mem_region && ab->qmi.mem_seg_count <= 2) { + if (!ab->bus_params.fixed_mem_region && ab->qmi.target_mem_delayed) { + delayed = true; ath11k_dbg(ab, ATH11K_DBG_QMI, "qmi delays mem_request %d\n", ab->qmi.mem_seg_count); memset(req, 0, sizeof(*req)); } else { + delayed = false; req->mem_seg_len = ab->qmi.mem_seg_count; for (i = 0; i < req->mem_seg_len ; i++) { @@ -1700,6 +1705,12 @@ static int ath11k_qmi_respond_fw_mem_request(struct ath11k_base *ab) } if (resp.resp.result != QMI_RESULT_SUCCESS_V01) { + /* the error response is expected when + * target_mem_delayed is true. + */ + if (delayed && resp.resp.error == 0) + goto out; + ath11k_warn(ab, "Respond mem req failed, result: %d, err: %d\n", resp.resp.result, resp.resp.error); ret = -EINVAL; @@ -1734,6 +1745,8 @@ static int ath11k_qmi_alloc_target_mem_chunk(struct ath11k_base *ab) int i; struct target_mem_chunk *chunk; + ab->qmi.target_mem_delayed = false; + for (i = 0; i < ab->qmi.mem_seg_count; i++) { chunk = &ab->qmi.target_mem[i]; chunk->vaddr = dma_alloc_coherent(ab->dev, @@ -1741,6 +1754,15 @@ static int ath11k_qmi_alloc_target_mem_chunk(struct ath11k_base *ab) &chunk->paddr, GFP_KERNEL); if (!chunk->vaddr) { + if (ab->qmi.mem_seg_count <= 2) { + ath11k_dbg(ab, ATH11K_DBG_QMI, + "qmi dma allocation failed (%d B type %u), will try later with small size\n", + chunk->size, + chunk->type); + ath11k_qmi_free_target_mem_chunk(ab); + ab->qmi.target_mem_delayed = true; + return 0; + } ath11k_err(ab, "failed to alloc memory, size: 0x%x, type: %u\n", chunk->size, chunk->type); @@ -2465,7 +2487,7 @@ static void ath11k_qmi_msg_mem_request_cb(struct qmi_handle *qmi_hdl, ret); return; } - } else if (msg->mem_seg_len > 2) { + } else { ret = ath11k_qmi_alloc_target_mem_chunk(ab); if (ret) { ath11k_warn(ab, "qmi failed to alloc target memory: %d\n", diff --git a/drivers/net/wireless/ath/ath11k/qmi.h b/drivers/net/wireless/ath/ath11k/qmi.h index b0a818f0401b92..59f1452b3544c3 100644 --- a/drivers/net/wireless/ath/ath11k/qmi.h +++ b/drivers/net/wireless/ath/ath11k/qmi.h @@ -121,6 +121,7 @@ struct ath11k_qmi { struct target_mem_chunk target_mem[ATH11K_QMI_WLANFW_MAX_NUM_MEM_SEG_V01]; u32 mem_seg_count; u32 target_mem_mode; + bool target_mem_delayed; u8 cal_done; struct target_info target; struct m3_mem_region m3_mem; diff --git a/drivers/net/wireless/ath/ath11k/reg.c b/drivers/net/wireless/ath/ath11k/reg.c index f6a1f0352989da..678d0885fcee7e 100644 --- a/drivers/net/wireless/ath/ath11k/reg.c +++ b/drivers/net/wireless/ath/ath11k/reg.c @@ -80,6 +80,7 @@ ath11k_reg_notifier(struct wiphy *wiphy, struct regulatory_request *request) */ init_country_param.flags = ALPHA_IS_SET; memcpy(&init_country_param.cc_info.alpha2, request->alpha2, 2); + init_country_param.cc_info.alpha2[2] = 0; ret = ath11k_wmi_send_init_country_cmd(ar, init_country_param); if (ret) @@ -584,7 +585,6 @@ ath11k_reg_build_regd(struct ath11k_base *ab, if (!tmp_regd) goto ret; - tmp_regd->n_reg_rules = num_rules; memcpy(tmp_regd->alpha2, reg_info->alpha2, REG_ALPHA2_LEN + 1); memcpy(alpha2, reg_info->alpha2, REG_ALPHA2_LEN + 1); alpha2[2] = '\0'; @@ -597,7 +597,7 @@ ath11k_reg_build_regd(struct ath11k_base *ab, /* Update reg_rules[] below. Firmware is expected to * send these rules in order(2G rules first and then 5G) */ - for (; i < tmp_regd->n_reg_rules; i++) { + for (; i < num_rules; i++) { if (reg_info->num_2g_reg_rules && (i < reg_info->num_2g_reg_rules)) { reg_rule = reg_info->reg_rules_2g_ptr + i; @@ -652,6 +652,8 @@ ath11k_reg_build_regd(struct ath11k_base *ab, flags); } + tmp_regd->n_reg_rules = i; + if (intersect) { default_regd = ab->default_regd[reg_info->phy_id]; diff --git a/drivers/net/wireless/ath/ath11k/wmi.c b/drivers/net/wireless/ath/ath11k/wmi.c index 8eca92520837ed..04b8b002edfe0c 100644 --- a/drivers/net/wireless/ath/ath11k/wmi.c +++ b/drivers/net/wireless/ath/ath11k/wmi.c @@ -2198,37 +2198,6 @@ int ath11k_wmi_send_scan_start_cmd(struct ath11k *ar, } } - len = params->num_hint_s_ssid * sizeof(struct hint_short_ssid); - tlv = ptr; - tlv->header = FIELD_PREP(WMI_TLV_TAG, WMI_TAG_ARRAY_FIXED_STRUCT) | - FIELD_PREP(WMI_TLV_LEN, len); - ptr += TLV_HDR_SIZE; - if (params->num_hint_s_ssid) { - s_ssid = ptr; - for (i = 0; i < params->num_hint_s_ssid; ++i) { - s_ssid->freq_flags = params->hint_s_ssid[i].freq_flags; - s_ssid->short_ssid = params->hint_s_ssid[i].short_ssid; - s_ssid++; - } - } - ptr += len; - - len = params->num_hint_bssid * sizeof(struct hint_bssid); - tlv = ptr; - tlv->header = FIELD_PREP(WMI_TLV_TAG, WMI_TAG_ARRAY_FIXED_STRUCT) | - FIELD_PREP(WMI_TLV_LEN, len); - ptr += TLV_HDR_SIZE; - if (params->num_hint_bssid) { - hint_bssid = ptr; - for (i = 0; i < params->num_hint_bssid; ++i) { - hint_bssid->freq_flags = - params->hint_bssid[i].freq_flags; - ether_addr_copy(¶ms->hint_bssid[i].bssid.addr[0], - &hint_bssid->bssid.addr[0]); - hint_bssid++; - } - } - ret = ath11k_wmi_cmd_send(wmi, skb, WMI_START_SCAN_CMDID); if (ret) { diff --git a/drivers/net/wireless/ath/wil6210/Kconfig b/drivers/net/wireless/ath/wil6210/Kconfig index 6a95b199bf626f..f074e9c31aa222 100644 --- a/drivers/net/wireless/ath/wil6210/Kconfig +++ b/drivers/net/wireless/ath/wil6210/Kconfig @@ -2,6 +2,7 @@ config WIL6210 tristate "Wilocity 60g WiFi card wil6210 support" select WANT_DEV_COREDUMP + select CRC32 depends on CFG80211 depends on PCI default n diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c index a2dbbb977d0cb7..0ee421f30aa249 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c @@ -2137,7 +2137,8 @@ brcmf_cfg80211_connect(struct wiphy *wiphy, struct net_device *ndev, BRCMF_WSEC_MAX_PSK_LEN); else if (profile->use_fwsup == BRCMF_PROFILE_FWSUP_SAE) { /* clean up user-space RSNE */ - if (brcmf_fil_iovar_data_set(ifp, "wpaie", NULL, 0)) { + err = brcmf_fil_iovar_data_set(ifp, "wpaie", NULL, 0); + if (err) { bphy_err(drvr, "failed to clean up user-space RSNE\n"); goto done; } diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c index 39381cbde89e6c..d8db0dbcfe091f 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c @@ -1936,16 +1936,18 @@ brcmf_pcie_probe(struct pci_dev *pdev, const struct pci_device_id *id) fwreq = brcmf_pcie_prepare_fw_request(devinfo); if (!fwreq) { ret = -ENOMEM; - goto fail_bus; + goto fail_brcmf; } ret = brcmf_fw_get_firmwares(bus->dev, fwreq, brcmf_pcie_setup); if (ret < 0) { kfree(fwreq); - goto fail_bus; + goto fail_brcmf; } return 0; +fail_brcmf: + brcmf_free(&devinfo->pdev->dev); fail_bus: kfree(bus->msgbuf); kfree(bus); diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c index 99987a789e7e31..59c2b2b6027da0 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c @@ -4541,6 +4541,7 @@ void brcmf_sdio_remove(struct brcmf_sdio *bus) brcmf_sdiod_intr_unregister(bus->sdiodev); brcmf_detach(bus->sdiodev->dev); + brcmf_free(bus->sdiodev->dev); cancel_work_sync(&bus->datawork); if (bus->brcmf_wq) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/pnvm.c b/drivers/net/wireless/intel/iwlwifi/fw/pnvm.c index 6d8f7bff12432c..895a907acdf0fb 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/pnvm.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/pnvm.c @@ -224,40 +224,46 @@ static int iwl_pnvm_parse(struct iwl_trans *trans, const u8 *data, int iwl_pnvm_load(struct iwl_trans *trans, struct iwl_notif_wait_data *notif_wait) { - const struct firmware *pnvm; struct iwl_notification_wait pnvm_wait; static const u16 ntf_cmds[] = { WIDE_ID(REGULATORY_AND_NVM_GROUP, PNVM_INIT_COMPLETE_NTFY) }; - char pnvm_name[64]; - int ret; /* if the SKU_ID is empty, there's nothing to do */ if (!trans->sku_id[0] && !trans->sku_id[1] && !trans->sku_id[2]) return 0; - /* if we already have it, nothing to do either */ - if (trans->pnvm_loaded) - return 0; + /* load from disk only if we haven't done it (or tried) before */ + if (!trans->pnvm_loaded) { + const struct firmware *pnvm; + char pnvm_name[64]; + int ret; + + /* + * The prefix unfortunately includes a hyphen at the end, so + * don't add the dot here... + */ + snprintf(pnvm_name, sizeof(pnvm_name), "%spnvm", + trans->cfg->fw_name_pre); + + /* ...but replace the hyphen with the dot here. */ + if (strlen(trans->cfg->fw_name_pre) < sizeof(pnvm_name)) + pnvm_name[strlen(trans->cfg->fw_name_pre) - 1] = '.'; + + ret = firmware_request_nowarn(&pnvm, pnvm_name, trans->dev); + if (ret) { + IWL_DEBUG_FW(trans, "PNVM file %s not found %d\n", + pnvm_name, ret); + /* + * Pretend we've loaded it - at least we've tried and + * couldn't load it at all, so there's no point in + * trying again over and over. + */ + trans->pnvm_loaded = true; + } else { + iwl_pnvm_parse(trans, pnvm->data, pnvm->size); - /* - * The prefix unfortunately includes a hyphen at the end, so - * don't add the dot here... - */ - snprintf(pnvm_name, sizeof(pnvm_name), "%spnvm", - trans->cfg->fw_name_pre); - - /* ...but replace the hyphen with the dot here. */ - if (strlen(trans->cfg->fw_name_pre) < sizeof(pnvm_name)) - pnvm_name[strlen(trans->cfg->fw_name_pre) - 1] = '.'; - - ret = firmware_request_nowarn(&pnvm, pnvm_name, trans->dev); - if (ret) { - IWL_DEBUG_FW(trans, "PNVM file %s not found %d\n", - pnvm_name, ret); - } else { - iwl_pnvm_parse(trans, pnvm->data, pnvm->size); - - release_firmware(pnvm); + release_firmware(pnvm); + } } iwl_init_notification_wait(notif_wait, &pnvm_wait, diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-config.h b/drivers/net/wireless/intel/iwlwifi/iwl-config.h index 580b07a43856d1..e82e3fc963be27 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-config.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-config.h @@ -498,7 +498,7 @@ struct iwl_cfg { #define IWL_CFG_CORES_BT_GNSS 0x5 #define IWL_SUBDEVICE_RF_ID(subdevice) ((u16)((subdevice) & 0x00F0) >> 4) -#define IWL_SUBDEVICE_NO_160(subdevice) ((u16)((subdevice) & 0x0100) >> 9) +#define IWL_SUBDEVICE_NO_160(subdevice) ((u16)((subdevice) & 0x0200) >> 9) #define IWL_SUBDEVICE_CORES(subdevice) ((u16)((subdevice) & 0x1C00) >> 10) struct iwl_dev_info { diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c b/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c index 51ce93d21ffe5b..fcad5cdcabfa46 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c @@ -237,13 +237,6 @@ static int iwl_dbg_tlv_alloc_region(struct iwl_trans *trans, if (le32_to_cpu(tlv->length) < sizeof(*reg)) return -EINVAL; - /* For safe using a string from FW make sure we have a - * null terminator - */ - reg->name[IWL_FW_INI_MAX_NAME - 1] = 0; - - IWL_DEBUG_FW(trans, "WRT: parsing region: %s\n", reg->name); - if (id >= IWL_FW_INI_MAX_REGION_ID) { IWL_ERR(trans, "WRT: Invalid region id %u\n", id); return -EINVAL; @@ -808,7 +801,7 @@ static bool is_trig_data_contained(struct iwl_ucode_tlv *new, struct iwl_fw_ini_trigger_tlv *old_trig = (void *)old->data; __le32 *new_data = new_trig->data, *old_data = old_trig->data; u32 new_dwords_num = iwl_tlv_array_len(new, new_trig, data); - u32 old_dwords_num = iwl_tlv_array_len(new, new_trig, data); + u32 old_dwords_num = iwl_tlv_array_len(old, old_trig, data); int i, j; for (i = 0; i < new_dwords_num; i++) { diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-prph.h b/drivers/net/wireless/intel/iwlwifi/iwl-prph.h index fa3f15778fc7b8..579578534f9d9c 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-prph.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-prph.h @@ -355,6 +355,12 @@ #define RADIO_RSP_ADDR_POS (6) #define RADIO_RSP_RD_CMD (3) +/* LTR control (Qu only) */ +#define HPM_MAC_LTR_CSR 0xa0348c +#define HPM_MAC_LRT_ENABLE_ALL 0xf +/* also uses CSR_LTR_* for values */ +#define HPM_UMAC_LTR 0xa03480 + /* FW monitor */ #define MON_BUFF_SAMPLE_CTL (0xa03c00) #define MON_BUFF_BASE_ADDR (0xa03c1c) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c index f1c5b3a9c26f75..0d1118f66f0d5d 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c @@ -315,6 +315,12 @@ static const struct iwl_rx_handlers iwl_mvm_rx_handlers[] = { iwl_mvm_mu_mimo_grp_notif, RX_HANDLER_SYNC), RX_HANDLER_GRP(DATA_PATH_GROUP, STA_PM_NOTIF, iwl_mvm_sta_pm_notif, RX_HANDLER_SYNC), + RX_HANDLER_GRP(MAC_CONF_GROUP, PROBE_RESPONSE_DATA_NOTIF, + iwl_mvm_probe_resp_data_notif, + RX_HANDLER_ASYNC_LOCKED), + RX_HANDLER_GRP(MAC_CONF_GROUP, CHANNEL_SWITCH_NOA_NOTIF, + iwl_mvm_channel_switch_noa_notif, + RX_HANDLER_SYNC), }; #undef RX_HANDLER #undef RX_HANDLER_GRP diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c index fe1c538cd71828..7626117c01fa30 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c @@ -833,6 +833,7 @@ iwl_mvm_tx_tso_segment(struct sk_buff *skb, unsigned int num_subframes, next = skb_gso_segment(skb, netdev_flags); skb_shinfo(skb)->gso_size = mss; + skb_shinfo(skb)->gso_type = ipv4 ? SKB_GSO_TCPV4 : SKB_GSO_TCPV6; if (WARN_ON_ONCE(IS_ERR(next))) return -EINVAL; else if (next) @@ -855,6 +856,8 @@ iwl_mvm_tx_tso_segment(struct sk_buff *skb, unsigned int num_subframes, if (tcp_payload_len > mss) { skb_shinfo(tmp)->gso_size = mss; + skb_shinfo(tmp)->gso_type = ipv4 ? SKB_GSO_TCPV4 : + SKB_GSO_TCPV6; } else { if (qos) { u8 *qc; diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c b/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c index 5512e3c630c319..d719e433a59bfa 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c @@ -122,6 +122,15 @@ int iwl_pcie_ctxt_info_gen3_init(struct iwl_trans *trans, const struct fw_img *fw) { struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + u32 ltr_val = CSR_LTR_LONG_VAL_AD_NO_SNOOP_REQ | + u32_encode_bits(CSR_LTR_LONG_VAL_AD_SCALE_USEC, + CSR_LTR_LONG_VAL_AD_NO_SNOOP_SCALE) | + u32_encode_bits(250, + CSR_LTR_LONG_VAL_AD_NO_SNOOP_VAL) | + CSR_LTR_LONG_VAL_AD_SNOOP_REQ | + u32_encode_bits(CSR_LTR_LONG_VAL_AD_SCALE_USEC, + CSR_LTR_LONG_VAL_AD_SNOOP_SCALE) | + u32_encode_bits(250, CSR_LTR_LONG_VAL_AD_SNOOP_VAL); struct iwl_context_info_gen3 *ctxt_info_gen3; struct iwl_prph_scratch *prph_scratch; struct iwl_prph_scratch_ctrl_cfg *prph_sc_ctrl; @@ -253,23 +262,19 @@ int iwl_pcie_ctxt_info_gen3_init(struct iwl_trans *trans, iwl_set_bit(trans, CSR_CTXT_INFO_BOOT_CTRL, CSR_AUTO_FUNC_BOOT_ENA); - if (trans->trans_cfg->device_family == IWL_DEVICE_FAMILY_AX210) { - /* - * The firmware initializes this again later (to a smaller - * value), but for the boot process initialize the LTR to - * ~250 usec. - */ - u32 val = CSR_LTR_LONG_VAL_AD_NO_SNOOP_REQ | - u32_encode_bits(CSR_LTR_LONG_VAL_AD_SCALE_USEC, - CSR_LTR_LONG_VAL_AD_NO_SNOOP_SCALE) | - u32_encode_bits(250, - CSR_LTR_LONG_VAL_AD_NO_SNOOP_VAL) | - CSR_LTR_LONG_VAL_AD_SNOOP_REQ | - u32_encode_bits(CSR_LTR_LONG_VAL_AD_SCALE_USEC, - CSR_LTR_LONG_VAL_AD_SNOOP_SCALE) | - u32_encode_bits(250, CSR_LTR_LONG_VAL_AD_SNOOP_VAL); - - iwl_write32(trans, CSR_LTR_LONG_VAL_AD, val); + /* + * To workaround hardware latency issues during the boot process, + * initialize the LTR to ~250 usec (see ltr_val above). + * The firmware initializes this again later (to a smaller value). + */ + if ((trans->trans_cfg->device_family == IWL_DEVICE_FAMILY_AX210 || + trans->trans_cfg->device_family == IWL_DEVICE_FAMILY_22000) && + !trans->trans_cfg->integrated) { + iwl_write32(trans, CSR_LTR_LONG_VAL_AD, ltr_val); + } else if (trans->trans_cfg->integrated && + trans->trans_cfg->device_family == IWL_DEVICE_FAMILY_22000) { + iwl_write_prph(trans, HPM_MAC_LTR_CSR, HPM_MAC_LRT_ENABLE_ALL); + iwl_write_prph(trans, HPM_UMAC_LTR, ltr_val); } if (trans->trans_cfg->device_family >= IWL_DEVICE_FAMILY_AX210) @@ -341,6 +346,9 @@ int iwl_trans_pcie_ctx_info_gen3_set_pnvm(struct iwl_trans *trans, return ret; } + if (WARN_ON(prph_sc_ctrl->pnvm_cfg.pnvm_size)) + return -EBUSY; + prph_sc_ctrl->pnvm_cfg.pnvm_base_addr = cpu_to_le64(trans_pcie->pnvm_dram.physical); prph_sc_ctrl->pnvm_cfg.pnvm_size = diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c index 2fffbbc8462fc8..1a222469b5b4e0 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c @@ -2161,7 +2161,8 @@ static int iwl_trans_pcie_read_mem(struct iwl_trans *trans, u32 addr, while (offs < dwords) { /* limit the time we spin here under lock to 1/2s */ - ktime_t timeout = ktime_add_us(ktime_get(), 500 * USEC_PER_MSEC); + unsigned long end = jiffies + HZ / 2; + bool resched = false; if (iwl_trans_grab_nic_access(trans, &flags)) { iwl_write32(trans, HBUS_TARG_MEM_RADDR, @@ -2172,14 +2173,15 @@ static int iwl_trans_pcie_read_mem(struct iwl_trans *trans, u32 addr, HBUS_TARG_MEM_RDAT); offs++; - /* calling ktime_get is expensive so - * do it once in 128 reads - */ - if (offs % 128 == 0 && ktime_after(ktime_get(), - timeout)) + if (time_after(jiffies, end)) { + resched = true; break; + } } iwl_trans_release_nic_access(trans, &flags); + + if (resched) + cond_resched(); } else { return -EBUSY; } diff --git a/drivers/net/wireless/intersil/orinoco/orinoco_usb.c b/drivers/net/wireless/intersil/orinoco/orinoco_usb.c index b849d27bd741ee..d1fc948364c79b 100644 --- a/drivers/net/wireless/intersil/orinoco/orinoco_usb.c +++ b/drivers/net/wireless/intersil/orinoco/orinoco_usb.c @@ -1223,13 +1223,6 @@ static netdev_tx_t ezusb_xmit(struct sk_buff *skb, struct net_device *dev) if (skb->len < ETH_HLEN) goto drop; - ctx = ezusb_alloc_ctx(upriv, EZUSB_RID_TX, 0); - if (!ctx) - goto busy; - - memset(ctx->buf, 0, BULK_BUF_SIZE); - buf = ctx->buf->data; - tx_control = 0; err = orinoco_process_xmit_skb(skb, dev, priv, &tx_control, @@ -1237,6 +1230,13 @@ static netdev_tx_t ezusb_xmit(struct sk_buff *skb, struct net_device *dev) if (err) goto drop; + ctx = ezusb_alloc_ctx(upriv, EZUSB_RID_TX, 0); + if (!ctx) + goto drop; + + memset(ctx->buf, 0, BULK_BUF_SIZE); + buf = ctx->buf->data; + { __le16 *tx_cntl = (__le16 *)buf; *tx_cntl = cpu_to_le16(tx_control); diff --git a/drivers/net/wireless/marvell/mwifiex/join.c b/drivers/net/wireless/marvell/mwifiex/join.c index 5934f71475477e..173ccf79cbfcc8 100644 --- a/drivers/net/wireless/marvell/mwifiex/join.c +++ b/drivers/net/wireless/marvell/mwifiex/join.c @@ -877,6 +877,8 @@ mwifiex_cmd_802_11_ad_hoc_start(struct mwifiex_private *priv, memset(adhoc_start->ssid, 0, IEEE80211_MAX_SSID_LEN); + if (req_ssid->ssid_len > IEEE80211_MAX_SSID_LEN) + req_ssid->ssid_len = IEEE80211_MAX_SSID_LEN; memcpy(adhoc_start->ssid, req_ssid->ssid, req_ssid->ssid_len); mwifiex_dbg(adapter, INFO, "info: ADHOC_S_CMD: SSID = %s\n", diff --git a/drivers/net/wireless/marvell/mwifiex/main.c b/drivers/net/wireless/marvell/mwifiex/main.c index 9ba8a8f64976b9..6283df5aaaf8b2 100644 --- a/drivers/net/wireless/marvell/mwifiex/main.c +++ b/drivers/net/wireless/marvell/mwifiex/main.c @@ -1471,6 +1471,8 @@ int mwifiex_shutdown_sw(struct mwifiex_adapter *adapter) priv = mwifiex_get_priv(adapter, MWIFIEX_BSS_ROLE_ANY); mwifiex_deauthenticate(priv, NULL); + mwifiex_init_shutdown_fw(priv, MWIFIEX_FUNC_SHUTDOWN); + mwifiex_uninit_sw(adapter); adapter->is_up = false; diff --git a/drivers/net/wireless/mediatek/mt76/dma.c b/drivers/net/wireless/mediatek/mt76/dma.c index 214fc95b8a33f3..145e839fea4e56 100644 --- a/drivers/net/wireless/mediatek/mt76/dma.c +++ b/drivers/net/wireless/mediatek/mt76/dma.c @@ -72,9 +72,11 @@ mt76_free_pending_txwi(struct mt76_dev *dev) { struct mt76_txwi_cache *t; + local_bh_disable(); while ((t = __mt76_get_txwi(dev)) != NULL) dma_unmap_single(dev->dev, t->dma_addr, dev->drv->txwi_size, DMA_TO_DEVICE); + local_bh_enable(); } static int diff --git a/drivers/net/wireless/mediatek/mt76/mac80211.c b/drivers/net/wireless/mediatek/mt76/mac80211.c index 4befe7f937a919..466447a5184f84 100644 --- a/drivers/net/wireless/mediatek/mt76/mac80211.c +++ b/drivers/net/wireless/mediatek/mt76/mac80211.c @@ -305,6 +305,7 @@ mt76_phy_init(struct mt76_dev *dev, struct ieee80211_hw *hw) ieee80211_hw_set(hw, SUPPORT_FAST_XMIT); ieee80211_hw_set(hw, SUPPORTS_CLONED_SKBS); ieee80211_hw_set(hw, SUPPORTS_AMSDU_IN_AMPDU); + ieee80211_hw_set(hw, SUPPORTS_REORDERING_BUFFER); if (!(dev->drv->drv_flags & MT_DRV_AMSDU_OFFLOAD)) { ieee80211_hw_set(hw, TX_AMSDU); diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/pci.c b/drivers/net/wireless/mediatek/mt76/mt7603/pci.c index a5845da3547a93..06fa28f645f28e 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7603/pci.c +++ b/drivers/net/wireless/mediatek/mt76/mt7603/pci.c @@ -57,7 +57,8 @@ mt76pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) return 0; error: - ieee80211_free_hw(mt76_hw(dev)); + mt76_free_device(&dev->mt76); + return ret; } diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c index 8dc645e398fda4..3d62fda067e448 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c @@ -1046,15 +1046,17 @@ int mt7615_mac_wtbl_update_key(struct mt7615_dev *dev, if (cmd == SET_KEY) { if (cipher == MT_CIPHER_TKIP) { /* Rx/Tx MIC keys are swapped */ + memcpy(data, key, 16); memcpy(data + 16, key + 24, 8); memcpy(data + 24, key + 16, 8); + } else { + if (cipher != MT_CIPHER_BIP_CMAC_128 && wcid->cipher) + memmove(data + 16, data, 16); + if (cipher != MT_CIPHER_BIP_CMAC_128 || !wcid->cipher) + memcpy(data, key, keylen); + else if (cipher == MT_CIPHER_BIP_CMAC_128) + memcpy(data + 16, key, 16); } - if (cipher != MT_CIPHER_BIP_CMAC_128 && wcid->cipher) - memmove(data + 16, data, 16); - if (cipher != MT_CIPHER_BIP_CMAC_128 || !wcid->cipher) - memcpy(data, key, keylen); - else if (cipher == MT_CIPHER_BIP_CMAC_128) - memcpy(data + 16, key, 16); } else { if (wcid->cipher & ~BIT(cipher)) { if (cipher != MT_CIPHER_BIP_CMAC_128) diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mmio.c b/drivers/net/wireless/mediatek/mt76/mt7615/mmio.c index 6de492a4cf0258..9b191307e140e3 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7615/mmio.c +++ b/drivers/net/wireless/mediatek/mt76/mt7615/mmio.c @@ -240,7 +240,8 @@ int mt7615_mmio_probe(struct device *pdev, void __iomem *mem_base, return 0; error: - ieee80211_free_hw(mt76_hw(dev)); + mt76_free_device(&dev->mt76); + return ret; } diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/sdio_txrx.c b/drivers/net/wireless/mediatek/mt76/mt7615/sdio_txrx.c index 2486cda3243bc2..595519c5825584 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7615/sdio_txrx.c +++ b/drivers/net/wireless/mediatek/mt76/mt7615/sdio_txrx.c @@ -85,7 +85,7 @@ static int mt7663s_rx_run_queue(struct mt76_dev *dev, enum mt76_rxq_id qid, { struct mt76_queue *q = &dev->q_rx[qid]; struct mt76_sdio *sdio = &dev->sdio; - int len = 0, err, i, order; + int len = 0, err, i; struct page *page; u8 *buf; @@ -98,8 +98,7 @@ static int mt7663s_rx_run_queue(struct mt76_dev *dev, enum mt76_rxq_id qid, if (len > sdio->func->cur_blksize) len = roundup(len, sdio->func->cur_blksize); - order = get_order(len); - page = __dev_alloc_pages(GFP_KERNEL, order); + page = __dev_alloc_pages(GFP_KERNEL, get_order(len)); if (!page) return -ENOMEM; @@ -111,7 +110,7 @@ static int mt7663s_rx_run_queue(struct mt76_dev *dev, enum mt76_rxq_id qid, if (err < 0) { dev_err(dev->dev, "sdio read data failed:%d\n", err); - __free_pages(page, order); + put_page(page); return err; } @@ -128,7 +127,7 @@ static int mt7663s_rx_run_queue(struct mt76_dev *dev, enum mt76_rxq_id qid, if (q->queued + i + 1 == q->ndesc) break; } - __free_pages(page, order); + put_page(page); spin_lock_bh(&q->lock); q->head = (q->head + i) % q->ndesc; @@ -150,7 +149,7 @@ static int mt7663s_tx_pick_quota(struct mt76_sdio *sdio, enum mt76_txq_id qid, return -EBUSY; } else { if (sdio->sched.pse_data_quota < *pse_size + pse_sz || - sdio->sched.ple_data_quota < *ple_size) + sdio->sched.ple_data_quota < *ple_size + 1) return -EBUSY; *ple_size = *ple_size + 1; diff --git a/drivers/net/wireless/mediatek/mt76/mt76x0/pci.c b/drivers/net/wireless/mediatek/mt76/mt76x0/pci.c index dda11c704abaa6..b87d8e136cb9af 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x0/pci.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x0/pci.c @@ -194,7 +194,8 @@ mt76x0e_probe(struct pci_dev *pdev, const struct pci_device_id *id) return 0; error: - ieee80211_free_hw(mt76_hw(dev)); + mt76_free_device(&dev->mt76); + return ret; } diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c b/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c index 4d50dad29ddffa..ecaf85b483ac3f 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c @@ -90,7 +90,8 @@ mt76x2e_probe(struct pci_dev *pdev, const struct pci_device_id *id) return 0; error: - ieee80211_free_hw(mt76_hw(dev)); + mt76_free_device(&dev->mt76); + return ret; } diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/debugfs.c b/drivers/net/wireless/mediatek/mt76/mt7915/debugfs.c index 1049927faf2461..8f2ad32ade180f 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7915/debugfs.c +++ b/drivers/net/wireless/mediatek/mt76/mt7915/debugfs.c @@ -233,6 +233,7 @@ static const struct file_operations fops_tx_stats = { .read = seq_read, .llseek = seq_lseek, .release = single_release, + .owner = THIS_MODULE, }; static int mt7915_read_temperature(struct seq_file *s, void *data) @@ -460,6 +461,7 @@ static const struct file_operations fops_sta_stats = { .read = seq_read, .llseek = seq_lseek, .release = single_release, + .owner = THIS_MODULE, }; void mt7915_sta_add_debugfs(struct ieee80211_hw *hw, struct ieee80211_vif *vif, diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/pci.c b/drivers/net/wireless/mediatek/mt76/mt7915/pci.c index fe62b4d853e482..3ac5bbb94d2941 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7915/pci.c +++ b/drivers/net/wireless/mediatek/mt76/mt7915/pci.c @@ -140,7 +140,7 @@ static int mt7915_pci_probe(struct pci_dev *pdev, dev = container_of(mdev, struct mt7915_dev, mt76); ret = mt7915_alloc_device(pdev, dev); if (ret) - return ret; + goto error; mt76_mmio_init(&dev->mt76, pcim_iomap_table(pdev)[0]); mdev->rev = (mt7915_l1_rr(dev, MT_HW_CHIPID) << 16) | @@ -163,7 +163,8 @@ static int mt7915_pci_probe(struct pci_dev *pdev, return 0; error: - ieee80211_free_hw(mt76_hw(dev)); + mt76_free_device(&dev->mt76); + return ret; } diff --git a/drivers/net/wireless/mediatek/mt7601u/dma.c b/drivers/net/wireless/mediatek/mt7601u/dma.c index 09f931d4598c2e..11071519fce815 100644 --- a/drivers/net/wireless/mediatek/mt7601u/dma.c +++ b/drivers/net/wireless/mediatek/mt7601u/dma.c @@ -152,8 +152,7 @@ mt7601u_rx_process_entry(struct mt7601u_dev *dev, struct mt7601u_dma_buf_rx *e) if (new_p) { /* we have one extra ref from the allocator */ - __free_pages(e->p, MT_RX_ORDER); - + put_page(e->p); e->p = new_p; } } @@ -310,7 +309,6 @@ static int mt7601u_dma_submit_tx(struct mt7601u_dev *dev, } e = &q->e[q->end]; - e->skb = skb; usb_fill_bulk_urb(e->urb, usb_dev, snd_pipe, skb->data, skb->len, mt7601u_complete_tx, q); ret = usb_submit_urb(e->urb, GFP_ATOMIC); @@ -328,6 +326,7 @@ static int mt7601u_dma_submit_tx(struct mt7601u_dev *dev, q->end = (q->end + 1) % q->entries; q->used++; + e->skb = skb; if (q->used >= q->entries) ieee80211_stop_queue(dev->hw, skb_get_queue_mapping(skb)); diff --git a/drivers/net/wireless/quantenna/qtnfmac/pcie/pcie.c b/drivers/net/wireless/quantenna/qtnfmac/pcie/pcie.c index 5337e67092ca67..0f328ce47fee3a 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/pcie/pcie.c +++ b/drivers/net/wireless/quantenna/qtnfmac/pcie/pcie.c @@ -299,19 +299,19 @@ static int qtnf_pcie_probe(struct pci_dev *pdev, const struct pci_device_id *id) sysctl_bar = qtnf_map_bar(pdev, QTN_SYSCTL_BAR); if (IS_ERR(sysctl_bar)) { pr_err("failed to map BAR%u\n", QTN_SYSCTL_BAR); - return ret; + return PTR_ERR(sysctl_bar); } dmareg_bar = qtnf_map_bar(pdev, QTN_DMA_BAR); if (IS_ERR(dmareg_bar)) { pr_err("failed to map BAR%u\n", QTN_DMA_BAR); - return ret; + return PTR_ERR(dmareg_bar); } epmem_bar = qtnf_map_bar(pdev, QTN_SHMEM_BAR); if (IS_ERR(epmem_bar)) { pr_err("failed to map BAR%u\n", QTN_SHMEM_BAR); - return ret; + return PTR_ERR(epmem_bar); } chipid = qtnf_chip_id_get(sysctl_bar); diff --git a/drivers/net/wireless/realtek/rtlwifi/core.c b/drivers/net/wireless/realtek/rtlwifi/core.c index a7259dbc953da7..965bd958904593 100644 --- a/drivers/net/wireless/realtek/rtlwifi/core.c +++ b/drivers/net/wireless/realtek/rtlwifi/core.c @@ -78,7 +78,6 @@ static void rtl_fw_do_work(const struct firmware *firmware, void *context, rtl_dbg(rtlpriv, COMP_ERR, DBG_LOUD, "Firmware callback routine entered!\n"); - complete(&rtlpriv->firmware_loading_complete); if (!firmware) { if (rtlpriv->cfg->alt_fw_name) { err = request_firmware(&firmware, @@ -91,13 +90,13 @@ static void rtl_fw_do_work(const struct firmware *firmware, void *context, } pr_err("Selected firmware is not available\n"); rtlpriv->max_fw_size = 0; - return; + goto exit; } found_alt: if (firmware->size > rtlpriv->max_fw_size) { pr_err("Firmware is too big!\n"); release_firmware(firmware); - return; + goto exit; } if (!is_wow) { memcpy(rtlpriv->rtlhal.pfirmware, firmware->data, @@ -109,6 +108,9 @@ static void rtl_fw_do_work(const struct firmware *firmware, void *context, rtlpriv->rtlhal.wowlan_fwsize = firmware->size; } release_firmware(firmware); + +exit: + complete(&rtlpriv->firmware_loading_complete); } void rtl_fw_cb(const struct firmware *firmware, void *context) diff --git a/drivers/net/wireless/rsi/rsi_91x_usb.c b/drivers/net/wireless/rsi/rsi_91x_usb.c index a62d41c0ccbc04..00b5589847985f 100644 --- a/drivers/net/wireless/rsi/rsi_91x_usb.c +++ b/drivers/net/wireless/rsi/rsi_91x_usb.c @@ -741,24 +741,24 @@ static int rsi_reset_card(struct rsi_hw *adapter) if (ret < 0) goto fail; } else { - if ((rsi_usb_master_reg_write(adapter, - NWP_WWD_INTERRUPT_TIMER, - NWP_WWD_INT_TIMER_CLKS, - RSI_9116_REG_SIZE)) < 0) { + ret = rsi_usb_master_reg_write(adapter, + NWP_WWD_INTERRUPT_TIMER, + NWP_WWD_INT_TIMER_CLKS, + RSI_9116_REG_SIZE); + if (ret < 0) goto fail; - } - if ((rsi_usb_master_reg_write(adapter, - NWP_WWD_SYSTEM_RESET_TIMER, - NWP_WWD_SYS_RESET_TIMER_CLKS, - RSI_9116_REG_SIZE)) < 0) { + ret = rsi_usb_master_reg_write(adapter, + NWP_WWD_SYSTEM_RESET_TIMER, + NWP_WWD_SYS_RESET_TIMER_CLKS, + RSI_9116_REG_SIZE); + if (ret < 0) goto fail; - } - if ((rsi_usb_master_reg_write(adapter, - NWP_WWD_MODE_AND_RSTART, - NWP_WWD_TIMER_DISABLE, - RSI_9116_REG_SIZE)) < 0) { + ret = rsi_usb_master_reg_write(adapter, + NWP_WWD_MODE_AND_RSTART, + NWP_WWD_TIMER_DISABLE, + RSI_9116_REG_SIZE); + if (ret < 0) goto fail; - } } rsi_dbg(INFO_ZONE, "Reset card done\n"); diff --git a/drivers/net/wireless/st/cw1200/main.c b/drivers/net/wireless/st/cw1200/main.c index f7fe56affbcd21..326b1cc1d2bcb6 100644 --- a/drivers/net/wireless/st/cw1200/main.c +++ b/drivers/net/wireless/st/cw1200/main.c @@ -381,6 +381,7 @@ static struct ieee80211_hw *cw1200_init_common(const u8 *macaddr, CW1200_LINK_ID_MAX, cw1200_skb_dtor, priv)) { + destroy_workqueue(priv->workqueue); ieee80211_free_hw(hw); return NULL; } @@ -392,6 +393,7 @@ static struct ieee80211_hw *cw1200_init_common(const u8 *macaddr, for (; i > 0; i--) cw1200_queue_deinit(&priv->tx_queue[i - 1]); cw1200_queue_stats_deinit(&priv->tx_queue_stats); + destroy_workqueue(priv->workqueue); ieee80211_free_hw(hw); return NULL; } diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c index f1c1624cec8f5d..6f10e0998f1ced 100644 --- a/drivers/net/xen-netback/xenbus.c +++ b/drivers/net/xen-netback/xenbus.c @@ -557,12 +557,14 @@ static int xen_register_credit_watch(struct xenbus_device *dev, return -ENOMEM; snprintf(node, maxlen, "%s/rate", dev->nodename); vif->credit_watch.node = node; + vif->credit_watch.will_handle = NULL; vif->credit_watch.callback = xen_net_rate_changed; err = register_xenbus_watch(&vif->credit_watch); if (err) { pr_err("Failed to set watcher %s\n", vif->credit_watch.node); kfree(node); vif->credit_watch.node = NULL; + vif->credit_watch.will_handle = NULL; vif->credit_watch.callback = NULL; } return err; @@ -609,6 +611,7 @@ static int xen_register_mcast_ctrl_watch(struct xenbus_device *dev, snprintf(node, maxlen, "%s/request-multicast-control", dev->otherend); vif->mcast_ctrl_watch.node = node; + vif->mcast_ctrl_watch.will_handle = NULL; vif->mcast_ctrl_watch.callback = xen_mcast_ctrl_changed; err = register_xenbus_watch(&vif->mcast_ctrl_watch); if (err) { @@ -616,6 +619,7 @@ static int xen_register_mcast_ctrl_watch(struct xenbus_device *dev, vif->mcast_ctrl_watch.node); kfree(node); vif->mcast_ctrl_watch.node = NULL; + vif->mcast_ctrl_watch.will_handle = NULL; vif->mcast_ctrl_watch.callback = NULL; } return err; @@ -820,7 +824,7 @@ static void connect(struct backend_info *be) xenvif_carrier_on(be->vif); unregister_hotplug_status_watch(be); - err = xenbus_watch_pathfmt(dev, &be->hotplug_status_watch, + err = xenbus_watch_pathfmt(dev, &be->hotplug_status_watch, NULL, hotplug_status_changed, "%s/%s", dev->nodename, "hotplug-status"); if (!err) diff --git a/drivers/nfc/s3fwrn5/firmware.c b/drivers/nfc/s3fwrn5/firmware.c index ec930ee2c847e7..64df50827642b3 100644 --- a/drivers/nfc/s3fwrn5/firmware.c +++ b/drivers/nfc/s3fwrn5/firmware.c @@ -293,8 +293,10 @@ static int s3fwrn5_fw_request_firmware(struct s3fwrn5_fw_info *fw_info) if (ret < 0) return ret; - if (fw->fw->size < S3FWRN5_FW_IMAGE_HEADER_SIZE) + if (fw->fw->size < S3FWRN5_FW_IMAGE_HEADER_SIZE) { + release_firmware(fw->fw); return -EINVAL; + } memcpy(fw->date, fw->fw->data + 0x00, 12); fw->date[12] = '\0'; diff --git a/drivers/nvdimm/label.c b/drivers/nvdimm/label.c index 47a4828b8b310d..9251441fd8a352 100644 --- a/drivers/nvdimm/label.c +++ b/drivers/nvdimm/label.c @@ -980,6 +980,15 @@ static int __blk_label_update(struct nd_region *nd_region, } } + /* release slots associated with any invalidated UUIDs */ + mutex_lock(&nd_mapping->lock); + list_for_each_entry_safe(label_ent, e, &nd_mapping->labels, list) + if (test_and_clear_bit(ND_LABEL_REAP, &label_ent->flags)) { + reap_victim(nd_mapping, label_ent); + list_move(&label_ent->list, &list); + } + mutex_unlock(&nd_mapping->lock); + /* * Find the resource associated with the first label in the set * per the v1.2 namespace specification. @@ -999,8 +1008,10 @@ static int __blk_label_update(struct nd_region *nd_region, if (is_old_resource(res, old_res_list, old_num_resources)) continue; /* carry-over */ slot = nd_label_alloc_slot(ndd); - if (slot == UINT_MAX) + if (slot == UINT_MAX) { + rc = -ENXIO; goto abort; + } dev_dbg(ndd->dev, "allocated: %d\n", slot); nd_label = to_label(ndd, slot); diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 9a270e49df179f..4ec5f05dabe1d5 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1489,8 +1489,21 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) } length = (io.nblocks + 1) << ns->lba_shift; - meta_len = (io.nblocks + 1) * ns->ms; - metadata = nvme_to_user_ptr(io.metadata); + + if ((io.control & NVME_RW_PRINFO_PRACT) && + ns->ms == sizeof(struct t10_pi_tuple)) { + /* + * Protection information is stripped/inserted by the + * controller. + */ + if (nvme_to_user_ptr(io.metadata)) + return -EINVAL; + meta_len = 0; + metadata = NULL; + } else { + meta_len = (io.nblocks + 1) * ns->ms; + metadata = nvme_to_user_ptr(io.metadata); + } if (ns->features & NVME_NS_EXT_LBAS) { length += meta_len; @@ -2802,6 +2815,11 @@ static const struct attribute_group *nvme_subsys_attrs_groups[] = { NULL, }; +static inline bool nvme_discovery_ctrl(struct nvme_ctrl *ctrl) +{ + return ctrl->opts && ctrl->opts->discovery_nqn; +} + static bool nvme_validate_cntlid(struct nvme_subsystem *subsys, struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) { @@ -2821,7 +2839,7 @@ static bool nvme_validate_cntlid(struct nvme_subsystem *subsys, } if ((id->cmic & NVME_CTRL_CMIC_MULTI_CTRL) || - (ctrl->opts && ctrl->opts->discovery_nqn)) + nvme_discovery_ctrl(ctrl)) continue; dev_err(ctrl->device, @@ -3090,7 +3108,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) goto out_free; } - if (!ctrl->opts->discovery_nqn && !ctrl->kas) { + if (!nvme_discovery_ctrl(ctrl) && !ctrl->kas) { dev_err(ctrl->device, "keep-alive support is mandatory for fabrics\n"); ret = -EINVAL; @@ -3130,7 +3148,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) if (ret < 0) return ret; - if (!ctrl->identified) { + if (!ctrl->identified && !nvme_discovery_ctrl(ctrl)) { ret = nvme_hwmon_init(ctrl); if (ret < 0) return ret; diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index f4c246462658fa..5ead217ac2bc80 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -166,6 +166,7 @@ struct nvme_fc_ctrl { struct blk_mq_tag_set admin_tag_set; struct blk_mq_tag_set tag_set; + struct work_struct ioerr_work; struct delayed_work connect_work; struct kref ref; @@ -1888,6 +1889,15 @@ __nvme_fc_fcpop_chk_teardowns(struct nvme_fc_ctrl *ctrl, } } +static void +nvme_fc_ctrl_ioerr_work(struct work_struct *work) +{ + struct nvme_fc_ctrl *ctrl = + container_of(work, struct nvme_fc_ctrl, ioerr_work); + + nvme_fc_error_recovery(ctrl, "transport detected io error"); +} + static void nvme_fc_fcpio_done(struct nvmefc_fcp_req *req) { @@ -2046,7 +2056,7 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req) check_error: if (terminate_assoc) - nvme_fc_error_recovery(ctrl, "transport detected io error"); + queue_work(nvme_reset_wq, &ctrl->ioerr_work); } static int @@ -3233,6 +3243,7 @@ nvme_fc_delete_ctrl(struct nvme_ctrl *nctrl) { struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl); + cancel_work_sync(&ctrl->ioerr_work); cancel_delayed_work_sync(&ctrl->connect_work); /* * kill the association on the link side. this will block @@ -3449,6 +3460,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, INIT_WORK(&ctrl->ctrl.reset_work, nvme_fc_reset_ctrl_work); INIT_DELAYED_WORK(&ctrl->connect_work, nvme_fc_connect_ctrl_work); + INIT_WORK(&ctrl->ioerr_work, nvme_fc_ctrl_ioerr_work); spin_lock_init(&ctrl->lock); /* io queue count */ @@ -3540,6 +3552,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, fail_ctrl: nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING); + cancel_work_sync(&ctrl->ioerr_work); cancel_work_sync(&ctrl->ctrl.reset_work); cancel_delayed_work_sync(&ctrl->connect_work); diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 74896be40c1769..292e535a385d40 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -221,7 +221,7 @@ static struct nvme_ns *nvme_round_robin_path(struct nvme_ns_head *head, } for (ns = nvme_next_ns(head, old); - ns != old; + ns && ns != old; ns = nvme_next_ns(head, ns)) { if (nvme_path_is_disabled(ns)) continue; diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 3be352403839a0..a3486c1c27f0c2 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -542,50 +543,71 @@ static inline bool nvme_pci_use_sgls(struct nvme_dev *dev, struct request *req) return true; } -static void nvme_unmap_data(struct nvme_dev *dev, struct request *req) +static void nvme_free_prps(struct nvme_dev *dev, struct request *req) { - struct nvme_iod *iod = blk_mq_rq_to_pdu(req); const int last_prp = NVME_CTRL_PAGE_SIZE / sizeof(__le64) - 1; - dma_addr_t dma_addr = iod->first_dma, next_dma_addr; + struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + dma_addr_t dma_addr = iod->first_dma; int i; - if (iod->dma_len) { - dma_unmap_page(dev->dev, dma_addr, iod->dma_len, - rq_dma_dir(req)); - return; + for (i = 0; i < iod->npages; i++) { + __le64 *prp_list = nvme_pci_iod_list(req)[i]; + dma_addr_t next_dma_addr = le64_to_cpu(prp_list[last_prp]); + + dma_pool_free(dev->prp_page_pool, prp_list, dma_addr); + dma_addr = next_dma_addr; } - WARN_ON_ONCE(!iod->nents); +} - if (is_pci_p2pdma_page(sg_page(iod->sg))) - pci_p2pdma_unmap_sg(dev->dev, iod->sg, iod->nents, - rq_dma_dir(req)); - else - dma_unmap_sg(dev->dev, iod->sg, iod->nents, rq_dma_dir(req)); +static void nvme_free_sgls(struct nvme_dev *dev, struct request *req) +{ + const int last_sg = SGES_PER_PAGE - 1; + struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + dma_addr_t dma_addr = iod->first_dma; + int i; + for (i = 0; i < iod->npages; i++) { + struct nvme_sgl_desc *sg_list = nvme_pci_iod_list(req)[i]; + dma_addr_t next_dma_addr = le64_to_cpu((sg_list[last_sg]).addr); - if (iod->npages == 0) - dma_pool_free(dev->prp_small_pool, nvme_pci_iod_list(req)[0], - dma_addr); + dma_pool_free(dev->prp_page_pool, sg_list, dma_addr); + dma_addr = next_dma_addr; + } - for (i = 0; i < iod->npages; i++) { - void *addr = nvme_pci_iod_list(req)[i]; +} - if (iod->use_sgl) { - struct nvme_sgl_desc *sg_list = addr; +static void nvme_unmap_sg(struct nvme_dev *dev, struct request *req) +{ + struct nvme_iod *iod = blk_mq_rq_to_pdu(req); - next_dma_addr = - le64_to_cpu((sg_list[SGES_PER_PAGE - 1]).addr); - } else { - __le64 *prp_list = addr; + if (is_pci_p2pdma_page(sg_page(iod->sg))) + pci_p2pdma_unmap_sg(dev->dev, iod->sg, iod->nents, + rq_dma_dir(req)); + else + dma_unmap_sg(dev->dev, iod->sg, iod->nents, rq_dma_dir(req)); +} - next_dma_addr = le64_to_cpu(prp_list[last_prp]); - } +static void nvme_unmap_data(struct nvme_dev *dev, struct request *req) +{ + struct nvme_iod *iod = blk_mq_rq_to_pdu(req); - dma_pool_free(dev->prp_page_pool, addr, dma_addr); - dma_addr = next_dma_addr; + if (iod->dma_len) { + dma_unmap_page(dev->dev, iod->first_dma, iod->dma_len, + rq_dma_dir(req)); + return; } + WARN_ON_ONCE(!iod->nents); + + nvme_unmap_sg(dev, req); + if (iod->npages == 0) + dma_pool_free(dev->prp_small_pool, nvme_pci_iod_list(req)[0], + iod->first_dma); + else if (iod->use_sgl) + nvme_free_sgls(dev, req); + else + nvme_free_prps(dev, req); mempool_free(iod->sg, dev->iod_mempool); } @@ -661,7 +683,7 @@ static blk_status_t nvme_pci_setup_prps(struct nvme_dev *dev, __le64 *old_prp_list = prp_list; prp_list = dma_pool_alloc(pool, GFP_ATOMIC, &prp_dma); if (!prp_list) - return BLK_STS_RESOURCE; + goto free_prps; list[iod->npages++] = prp_list; prp_list[0] = old_prp_list[i - 1]; old_prp_list[i - 1] = cpu_to_le64(prp_dma); @@ -681,14 +703,14 @@ static blk_status_t nvme_pci_setup_prps(struct nvme_dev *dev, dma_addr = sg_dma_address(sg); dma_len = sg_dma_len(sg); } - done: cmnd->dptr.prp1 = cpu_to_le64(sg_dma_address(iod->sg)); cmnd->dptr.prp2 = cpu_to_le64(iod->first_dma); - return BLK_STS_OK; - - bad_sgl: +free_prps: + nvme_free_prps(dev, req); + return BLK_STS_RESOURCE; +bad_sgl: WARN(DO_ONCE(nvme_print_sgl, iod->sg, iod->nents), "Invalid SGL for payload:%d nents:%d\n", blk_rq_payload_bytes(req), iod->nents); @@ -760,7 +782,7 @@ static blk_status_t nvme_pci_setup_sgls(struct nvme_dev *dev, sg_list = dma_pool_alloc(pool, GFP_ATOMIC, &sgl_dma); if (!sg_list) - return BLK_STS_RESOURCE; + goto free_sgls; i = 0; nvme_pci_iod_list(req)[iod->npages++] = sg_list; @@ -773,6 +795,9 @@ static blk_status_t nvme_pci_setup_sgls(struct nvme_dev *dev, } while (--entries > 0); return BLK_STS_OK; +free_sgls: + nvme_free_sgls(dev, req); + return BLK_STS_RESOURCE; } static blk_status_t nvme_setup_prp_simple(struct nvme_dev *dev, @@ -841,7 +866,7 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req, sg_init_table(iod->sg, blk_rq_nr_phys_segments(req)); iod->nents = blk_rq_map_sg(req->q, req, iod->sg); if (!iod->nents) - goto out; + goto out_free_sg; if (is_pci_p2pdma_page(sg_page(iod->sg))) nr_mapped = pci_p2pdma_map_sg_attrs(dev->dev, iod->sg, @@ -850,16 +875,21 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req, nr_mapped = dma_map_sg_attrs(dev->dev, iod->sg, iod->nents, rq_dma_dir(req), DMA_ATTR_NO_WARN); if (!nr_mapped) - goto out; + goto out_free_sg; iod->use_sgl = nvme_pci_use_sgls(dev, req); if (iod->use_sgl) ret = nvme_pci_setup_sgls(dev, req, &cmnd->rw, nr_mapped); else ret = nvme_pci_setup_prps(dev, req, &cmnd->rw); -out: if (ret != BLK_STS_OK) - nvme_unmap_data(dev, req); + goto out_unmap_sg; + return BLK_STS_OK; + +out_unmap_sg: + nvme_unmap_sg(dev, req); +out_free_sg: + mempool_free(iod->sg, dev->iod_mempool); return ret; } @@ -967,6 +997,7 @@ static inline struct blk_mq_tags *nvme_queue_tagset(struct nvme_queue *nvmeq) static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx) { struct nvme_completion *cqe = &nvmeq->cqes[idx]; + __u16 command_id = READ_ONCE(cqe->command_id); struct request *req; /* @@ -975,17 +1006,17 @@ static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx) * aborts. We don't even bother to allocate a struct request * for them but rather special case them here. */ - if (unlikely(nvme_is_aen_req(nvmeq->qid, cqe->command_id))) { + if (unlikely(nvme_is_aen_req(nvmeq->qid, command_id))) { nvme_complete_async_event(&nvmeq->dev->ctrl, cqe->status, &cqe->result); return; } - req = blk_mq_tag_to_rq(nvme_queue_tagset(nvmeq), cqe->command_id); + req = blk_mq_tag_to_rq(nvme_queue_tagset(nvmeq), command_id); if (unlikely(!req)) { dev_warn(nvmeq->dev->ctrl.device, "invalid id %d completed on queue %d\n", - cqe->command_id, le16_to_cpu(cqe->sq_id)); + command_id, le16_to_cpu(cqe->sq_id)); return; } @@ -1795,6 +1826,9 @@ static void nvme_map_cmb(struct nvme_dev *dev) if (dev->cmb_size) return; + if (NVME_CAP_CMBS(dev->ctrl.cap)) + writel(NVME_CMBMSC_CRE, dev->bar + NVME_REG_CMBMSC); + dev->cmbsz = readl(dev->bar + NVME_REG_CMBSZ); if (!dev->cmbsz) return; @@ -1808,6 +1842,16 @@ static void nvme_map_cmb(struct nvme_dev *dev) if (offset > bar_size) return; + /* + * Tell the controller about the host side address mapping the CMB, + * and enable CMB decoding for the NVMe 1.4+ scheme: + */ + if (NVME_CAP_CMBS(dev->ctrl.cap)) { + hi_lo_writeq(NVME_CMBMSC_CRE | NVME_CMBMSC_CMSE | + (pci_bus_address(pdev, bar) + offset), + dev->bar + NVME_REG_CMBMSC); + } + /* * Controllers may support a CMB size larger than their BAR, * for example, due to being behind a bridge. Reduce the CMB to @@ -3201,7 +3245,8 @@ static const struct pci_device_id nvme_id_table[] = { { PCI_DEVICE(0x144d, 0xa821), /* Samsung PM1725 */ .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, }, { PCI_DEVICE(0x144d, 0xa822), /* Samsung PM1725a */ - .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, }, + .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY | + NVME_QUIRK_IGNORE_DEV_SUBNQN, }, { PCI_DEVICE(0x1d1d, 0x1f1f), /* LighNVM qemu device */ .driver_data = NVME_QUIRK_LIGHTNVM, }, { PCI_DEVICE(0x1d1d, 0x2807), /* CNEX WL */ diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 65e3d0ef36e1a3..493ed7ba86ed21 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -97,6 +97,7 @@ struct nvme_rdma_queue { struct completion cm_done; bool pi_support; int cq_size; + struct mutex queue_lock; }; struct nvme_rdma_ctrl { @@ -579,6 +580,7 @@ static int nvme_rdma_alloc_queue(struct nvme_rdma_ctrl *ctrl, int ret; queue = &ctrl->queues[idx]; + mutex_init(&queue->queue_lock); queue->ctrl = ctrl; if (idx && ctrl->ctrl.max_integrity_segments) queue->pi_support = true; @@ -598,7 +600,8 @@ static int nvme_rdma_alloc_queue(struct nvme_rdma_ctrl *ctrl, if (IS_ERR(queue->cm_id)) { dev_info(ctrl->ctrl.device, "failed to create CM ID: %ld\n", PTR_ERR(queue->cm_id)); - return PTR_ERR(queue->cm_id); + ret = PTR_ERR(queue->cm_id); + goto out_destroy_mutex; } if (ctrl->ctrl.opts->mask & NVMF_OPT_HOST_TRADDR) @@ -628,6 +631,8 @@ static int nvme_rdma_alloc_queue(struct nvme_rdma_ctrl *ctrl, out_destroy_cm_id: rdma_destroy_id(queue->cm_id); nvme_rdma_destroy_queue_ib(queue); +out_destroy_mutex: + mutex_destroy(&queue->queue_lock); return ret; } @@ -639,9 +644,10 @@ static void __nvme_rdma_stop_queue(struct nvme_rdma_queue *queue) static void nvme_rdma_stop_queue(struct nvme_rdma_queue *queue) { - if (!test_and_clear_bit(NVME_RDMA_Q_LIVE, &queue->flags)) - return; - __nvme_rdma_stop_queue(queue); + mutex_lock(&queue->queue_lock); + if (test_and_clear_bit(NVME_RDMA_Q_LIVE, &queue->flags)) + __nvme_rdma_stop_queue(queue); + mutex_unlock(&queue->queue_lock); } static void nvme_rdma_free_queue(struct nvme_rdma_queue *queue) @@ -651,6 +657,7 @@ static void nvme_rdma_free_queue(struct nvme_rdma_queue *queue) nvme_rdma_destroy_queue_ib(queue); rdma_destroy_id(queue->cm_id); + mutex_destroy(&queue->queue_lock); } static void nvme_rdma_free_io_queues(struct nvme_rdma_ctrl *ctrl) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index c0c33320fe659f..6487b7897d1fb4 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -76,6 +76,7 @@ struct nvme_tcp_queue { struct work_struct io_work; int io_cpu; + struct mutex queue_lock; struct mutex send_mutex; struct llist_head req_list; struct list_head send_list; @@ -201,7 +202,7 @@ static inline size_t nvme_tcp_req_cur_offset(struct nvme_tcp_request *req) static inline size_t nvme_tcp_req_cur_length(struct nvme_tcp_request *req) { - return min_t(size_t, req->iter.bvec->bv_len - req->iter.iov_offset, + return min_t(size_t, iov_iter_single_seg_count(&req->iter), req->pdu_len - req->pdu_sent); } @@ -262,6 +263,16 @@ static inline void nvme_tcp_advance_req(struct nvme_tcp_request *req, } } +static inline void nvme_tcp_send_all(struct nvme_tcp_queue *queue) +{ + int ret; + + /* drain the send queue as much as we can... */ + do { + ret = nvme_tcp_try_send(queue); + } while (ret > 0); +} + static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req, bool sync, bool last) { @@ -276,10 +287,10 @@ static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req, * directly, otherwise queue io_work. Also, only do that if we * are on the same cpu, so we don't introduce contention. */ - if (queue->io_cpu == smp_processor_id() && + if (queue->io_cpu == __smp_processor_id() && sync && empty && mutex_trylock(&queue->send_mutex)) { queue->more_requests = !last; - nvme_tcp_try_send(queue); + nvme_tcp_send_all(queue); queue->more_requests = false; mutex_unlock(&queue->send_mutex); } else if (last) { @@ -1209,6 +1220,7 @@ static void nvme_tcp_free_queue(struct nvme_ctrl *nctrl, int qid) sock_release(queue->sock); kfree(queue->pdu); + mutex_destroy(&queue->queue_lock); } static int nvme_tcp_init_connection(struct nvme_tcp_queue *queue) @@ -1370,6 +1382,7 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, struct nvme_tcp_queue *queue = &ctrl->queues[qid]; int ret, rcv_pdu_size; + mutex_init(&queue->queue_lock); queue->ctrl = ctrl; init_llist_head(&queue->req_list); INIT_LIST_HEAD(&queue->send_list); @@ -1388,7 +1401,7 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, if (ret) { dev_err(nctrl->device, "failed to create socket: %d\n", ret); - return ret; + goto err_destroy_mutex; } /* Single syn retry */ @@ -1497,6 +1510,8 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, err_sock: sock_release(queue->sock); queue->sock = NULL; +err_destroy_mutex: + mutex_destroy(&queue->queue_lock); return ret; } @@ -1524,9 +1539,10 @@ static void nvme_tcp_stop_queue(struct nvme_ctrl *nctrl, int qid) struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl); struct nvme_tcp_queue *queue = &ctrl->queues[qid]; - if (!test_and_clear_bit(NVME_TCP_Q_LIVE, &queue->flags)) - return; - __nvme_tcp_stop_queue(queue); + mutex_lock(&queue->queue_lock); + if (test_and_clear_bit(NVME_TCP_Q_LIVE, &queue->flags)) + __nvme_tcp_stop_queue(queue); + mutex_unlock(&queue->queue_lock); } static int nvme_tcp_start_queue(struct nvme_ctrl *nctrl, int idx) diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index dca34489a1dc9e..92ca23bc8dbfc8 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -487,8 +487,10 @@ static void nvmet_execute_identify_ns(struct nvmet_req *req) /* return an all zeroed buffer if we can't find an active namespace */ ns = nvmet_find_namespace(ctrl, req->cmd->identify.nsid); - if (!ns) + if (!ns) { + status = NVME_SC_INVALID_NS; goto done; + } nvmet_ns_revalidate(ns); @@ -541,7 +543,9 @@ static void nvmet_execute_identify_ns(struct nvmet_req *req) id->nsattr |= (1 << 0); nvmet_put_namespace(ns); done: - status = nvmet_copy_to_sgl(req, 0, id, sizeof(*id)); + if (!status) + status = nvmet_copy_to_sgl(req, 0, id, sizeof(*id)); + kfree(id); out: nvmet_req_complete(req, status); diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index ae6620489457d6..06b6b742bb213e 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -414,7 +414,8 @@ static int nvmet_rdma_alloc_rsp(struct nvmet_rdma_device *ndev, if (ib_dma_mapping_error(ndev->device, r->send_sge.addr)) goto out_free_rsp; - r->req.p2p_client = &ndev->device->dev; + if (!ib_uses_virt_dma(ndev->device)) + r->req.p2p_client = &ndev->device->dev; r->send_sge.length = sizeof(*r->req.cqe); r->send_sge.lkey = ndev->pd->local_dma_lkey; @@ -1219,6 +1220,14 @@ nvmet_rdma_find_get_device(struct rdma_cm_id *cm_id) } ndev->inline_data_size = nport->inline_data_size; ndev->inline_page_count = inline_page_count; + + if (nport->pi_enable && !(cm_id->device->attrs.device_cap_flags & + IB_DEVICE_INTEGRITY_HANDOVER)) { + pr_warn("T10-PI is not supported by device %s. Disabling it\n", + cm_id->device->name); + nport->pi_enable = false; + } + ndev->device = cm_id->device; kref_init(&ndev->ref); @@ -1640,6 +1649,16 @@ static void __nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue) spin_lock_irqsave(&queue->state_lock, flags); switch (queue->state) { case NVMET_RDMA_Q_CONNECTING: + while (!list_empty(&queue->rsp_wait_list)) { + struct nvmet_rdma_rsp *rsp; + + rsp = list_first_entry(&queue->rsp_wait_list, + struct nvmet_rdma_rsp, + wait_list); + list_del(&rsp->wait_list); + nvmet_rdma_put_rsp(rsp); + } + fallthrough; case NVMET_RDMA_Q_LIVE: queue->state = NVMET_RDMA_Q_DISCONNECTING; disconnect = true; @@ -1844,14 +1863,6 @@ static int nvmet_rdma_enable_port(struct nvmet_rdma_port *port) goto out_destroy_id; } - if (port->nport->pi_enable && - !(cm_id->device->attrs.device_cap_flags & - IB_DEVICE_INTEGRITY_HANDOVER)) { - pr_err("T10-PI is not supported for %pISpcs\n", addr); - ret = -EINVAL; - goto out_destroy_id; - } - port->cm_id = cm_id; return 0; diff --git a/drivers/of/device.c b/drivers/of/device.c index aedfaaafd3e7ed..1122daa8e27364 100644 --- a/drivers/of/device.c +++ b/drivers/of/device.c @@ -162,9 +162,11 @@ int of_dma_configure_id(struct device *dev, struct device_node *np, mask = DMA_BIT_MASK(ilog2(end) + 1); dev->coherent_dma_mask &= mask; *dev->dma_mask &= mask; - /* ...but only set bus limit if we found valid dma-ranges earlier */ - if (!ret) + /* ...but only set bus limit and range map if we found valid dma-ranges earlier */ + if (!ret) { dev->bus_dma_limit = end; + dev->dma_range_map = map; + } coherent = of_dma_is_coherent(np); dev_dbg(dev, "device is%sdma coherent\n", @@ -172,6 +174,9 @@ int of_dma_configure_id(struct device *dev, struct device_node *np, iommu = of_iommu_configure(dev, np, id); if (PTR_ERR(iommu) == -EPROBE_DEFER) { + /* Don't touch range map if it wasn't set from a valid dma-ranges */ + if (!ret) + dev->dma_range_map = NULL; kfree(map); return -EPROBE_DEFER; } @@ -181,7 +186,6 @@ int of_dma_configure_id(struct device *dev, struct device_node *np, arch_setup_dma_ops(dev, dma_start, size, iommu, coherent); - dev->dma_range_map = map; return 0; } EXPORT_SYMBOL_GPL(of_dma_configure_id); diff --git a/drivers/opp/core.c b/drivers/opp/core.c index 0e0a5269dc82f8..903b465c8568b7 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -1102,7 +1102,7 @@ static struct opp_table *_allocate_opp_table(struct device *dev, int index) if (IS_ERR(opp_table->clk)) { ret = PTR_ERR(opp_table->clk); if (ret == -EPROBE_DEFER) - goto err; + goto remove_opp_dev; dev_dbg(dev, "%s: Couldn't find clock: %d\n", __func__, ret); } @@ -1111,7 +1111,7 @@ static struct opp_table *_allocate_opp_table(struct device *dev, int index) ret = dev_pm_opp_of_find_icc_paths(dev, opp_table); if (ret) { if (ret == -EPROBE_DEFER) - goto err; + goto put_clk; dev_warn(dev, "%s: Error finding interconnect paths: %d\n", __func__, ret); @@ -1125,6 +1125,11 @@ static struct opp_table *_allocate_opp_table(struct device *dev, int index) list_add(&opp_table->node, &opp_tables); return opp_table; +put_clk: + if (!IS_ERR(opp_table->clk)) + clk_put(opp_table->clk); +remove_opp_dev: + _remove_opp_dev(opp_dev, opp_table); err: kfree(opp_table); return ERR_PTR(ret); diff --git a/drivers/pci/controller/pcie-brcmstb.c b/drivers/pci/controller/pcie-brcmstb.c index bea86899bd5df1..9c3d2982248d36 100644 --- a/drivers/pci/controller/pcie-brcmstb.c +++ b/drivers/pci/controller/pcie-brcmstb.c @@ -893,6 +893,7 @@ static int brcm_pcie_setup(struct brcm_pcie *pcie) burst = 0x2; /* 512 bytes */ /* Set SCB_MAX_BURST_SIZE, CFG_READ_UR_MODE, SCB_ACCESS_EN */ + tmp = readl(base + PCIE_MISC_MISC_CTRL); u32p_replace_bits(&tmp, 1, PCIE_MISC_MISC_CTRL_SCB_ACCESS_EN_MASK); u32p_replace_bits(&tmp, 1, PCIE_MISC_MISC_CTRL_CFG_READ_UR_MODE_MASK); u32p_replace_bits(&tmp, burst, PCIE_MISC_MISC_CTRL_MAX_BURST_SIZE_MASK); diff --git a/drivers/pci/controller/pcie-iproc.c b/drivers/pci/controller/pcie-iproc.c index 905e938082432b..cc5b7823edeb72 100644 --- a/drivers/pci/controller/pcie-iproc.c +++ b/drivers/pci/controller/pcie-iproc.c @@ -192,8 +192,15 @@ static const struct iproc_pcie_ib_map paxb_v2_ib_map[] = { .imap_window_offset = 0x4, }, { - /* IARR1/IMAP1 (currently unused) */ - .type = IPROC_PCIE_IB_MAP_INVALID, + /* IARR1/IMAP1 */ + .type = IPROC_PCIE_IB_MAP_MEM, + .size_unit = SZ_1M, + .region_sizes = { 8 }, + .nr_sizes = 1, + .nr_windows = 8, + .imap_addr_offset = 0x4, + .imap_window_offset = 0x8, + }, { /* IARR2/IMAP2 */ @@ -307,7 +314,7 @@ enum iproc_pcie_reg { }; /* iProc PCIe PAXB BCMA registers */ -static const u16 iproc_pcie_reg_paxb_bcma[] = { +static const u16 iproc_pcie_reg_paxb_bcma[IPROC_PCIE_MAX_NUM_REG] = { [IPROC_PCIE_CLK_CTRL] = 0x000, [IPROC_PCIE_CFG_IND_ADDR] = 0x120, [IPROC_PCIE_CFG_IND_DATA] = 0x124, @@ -318,7 +325,7 @@ static const u16 iproc_pcie_reg_paxb_bcma[] = { }; /* iProc PCIe PAXB registers */ -static const u16 iproc_pcie_reg_paxb[] = { +static const u16 iproc_pcie_reg_paxb[IPROC_PCIE_MAX_NUM_REG] = { [IPROC_PCIE_CLK_CTRL] = 0x000, [IPROC_PCIE_CFG_IND_ADDR] = 0x120, [IPROC_PCIE_CFG_IND_DATA] = 0x124, @@ -334,7 +341,7 @@ static const u16 iproc_pcie_reg_paxb[] = { }; /* iProc PCIe PAXB v2 registers */ -static const u16 iproc_pcie_reg_paxb_v2[] = { +static const u16 iproc_pcie_reg_paxb_v2[IPROC_PCIE_MAX_NUM_REG] = { [IPROC_PCIE_CLK_CTRL] = 0x000, [IPROC_PCIE_CFG_IND_ADDR] = 0x120, [IPROC_PCIE_CFG_IND_DATA] = 0x124, @@ -351,6 +358,8 @@ static const u16 iproc_pcie_reg_paxb_v2[] = { [IPROC_PCIE_OMAP3] = 0xdf8, [IPROC_PCIE_IARR0] = 0xd00, [IPROC_PCIE_IMAP0] = 0xc00, + [IPROC_PCIE_IARR1] = 0xd08, + [IPROC_PCIE_IMAP1] = 0xd70, [IPROC_PCIE_IARR2] = 0xd10, [IPROC_PCIE_IMAP2] = 0xcc0, [IPROC_PCIE_IARR3] = 0xe00, @@ -363,7 +372,7 @@ static const u16 iproc_pcie_reg_paxb_v2[] = { }; /* iProc PCIe PAXC v1 registers */ -static const u16 iproc_pcie_reg_paxc[] = { +static const u16 iproc_pcie_reg_paxc[IPROC_PCIE_MAX_NUM_REG] = { [IPROC_PCIE_CLK_CTRL] = 0x000, [IPROC_PCIE_CFG_IND_ADDR] = 0x1f0, [IPROC_PCIE_CFG_IND_DATA] = 0x1f4, @@ -372,7 +381,7 @@ static const u16 iproc_pcie_reg_paxc[] = { }; /* iProc PCIe PAXC v2 registers */ -static const u16 iproc_pcie_reg_paxc_v2[] = { +static const u16 iproc_pcie_reg_paxc_v2[IPROC_PCIE_MAX_NUM_REG] = { [IPROC_PCIE_MSI_GIC_MODE] = 0x050, [IPROC_PCIE_MSI_BASE_ADDR] = 0x074, [IPROC_PCIE_MSI_WINDOW_SIZE] = 0x078, diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c index bf03648c207231..745a4e0c4994fc 100644 --- a/drivers/pci/pci-acpi.c +++ b/drivers/pci/pci-acpi.c @@ -1060,7 +1060,7 @@ static int acpi_pci_propagate_wakeup(struct pci_bus *bus, bool enable) { while (bus->parent) { if (acpi_pm_device_can_wakeup(&bus->self->dev)) - return acpi_pm_set_bridge_wakeup(&bus->self->dev, enable); + return acpi_pm_set_device_wakeup(&bus->self->dev, enable); bus = bus->parent; } @@ -1068,7 +1068,7 @@ static int acpi_pci_propagate_wakeup(struct pci_bus *bus, bool enable) /* We have reached the root bus. */ if (bus->bridge) { if (acpi_pm_device_can_wakeup(bus->bridge)) - return acpi_pm_set_bridge_wakeup(bus->bridge, enable); + return acpi_pm_set_device_wakeup(bus->bridge, enable); } return 0; } diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index e578d34095e91a..6427cbd0a5be23 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -6202,19 +6202,21 @@ static resource_size_t pci_specified_resource_alignment(struct pci_dev *dev, while (*p) { count = 0; if (sscanf(p, "%d%n", &align_order, &count) == 1 && - p[count] == '@') { + p[count] == '@') { p += count + 1; + if (align_order > 63) { + pr_err("PCI: Invalid requested alignment (order %d)\n", + align_order); + align_order = PAGE_SHIFT; + } } else { - align_order = -1; + align_order = PAGE_SHIFT; } ret = pci_dev_str_match(dev, p, &p); if (ret == 1) { *resize = true; - if (align_order == -1) - align = PAGE_SIZE; - else - align = 1 << align_order; + align = 1ULL << align_order; break; } else if (ret < 0) { pr_err("PCI: Can't parse resource_alignment parameter: %s\n", diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index f70692ac79c565..fb1dc11e7cc529 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -5567,17 +5567,26 @@ static void pci_fixup_no_d0_pme(struct pci_dev *dev) DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ASMEDIA, 0x2142, pci_fixup_no_d0_pme); /* - * Device [12d8:0x400e] and [12d8:0x400f] + * Device 12d8:0x400e [OHCI] and 12d8:0x400f [EHCI] + * * These devices advertise PME# support in all power states but don't * reliably assert it. + * + * These devices also advertise MSI, but documentation (PI7C9X440SL.pdf) + * says "The MSI Function is not implemented on this device" in chapters + * 7.3.27, 7.3.29-7.3.31. */ -static void pci_fixup_no_pme(struct pci_dev *dev) +static void pci_fixup_no_msi_no_pme(struct pci_dev *dev) { +#ifdef CONFIG_PCI_MSI + pci_info(dev, "MSI is not implemented on this device, disabling it\n"); + dev->no_msi = 1; +#endif pci_info(dev, "PME# is unreliable, disabling it\n"); dev->pme_support = 0; } -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_PERICOM, 0x400e, pci_fixup_no_pme); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_PERICOM, 0x400f, pci_fixup_no_pme); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_PERICOM, 0x400e, pci_fixup_no_msi_no_pme); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_PERICOM, 0x400f, pci_fixup_no_msi_no_pme); static void apex_pci_fixup_class(struct pci_dev *pdev) { diff --git a/drivers/pci/slot.c b/drivers/pci/slot.c index 3861505741e6d5..ed2077e7470aef 100644 --- a/drivers/pci/slot.c +++ b/drivers/pci/slot.c @@ -272,6 +272,9 @@ struct pci_slot *pci_create_slot(struct pci_bus *parent, int slot_nr, goto err; } + INIT_LIST_HEAD(&slot->list); + list_add(&slot->list, &parent->slots); + err = kobject_init_and_add(&slot->kobj, &pci_slot_ktype, NULL, "%s", slot_name); if (err) { @@ -279,9 +282,6 @@ struct pci_slot *pci_create_slot(struct pci_bus *parent, int slot_nr, goto err; } - INIT_LIST_HEAD(&slot->list); - list_add(&slot->list, &parent->slots); - down_read(&pci_bus_sem); list_for_each_entry(dev, &parent->devices, bus_list) if (PCI_SLOT(dev->devfn) == slot_nr) diff --git a/drivers/phy/mediatek/Kconfig b/drivers/phy/mediatek/Kconfig index c8126bde9d7cce..43150608d8b627 100644 --- a/drivers/phy/mediatek/Kconfig +++ b/drivers/phy/mediatek/Kconfig @@ -38,7 +38,9 @@ config PHY_MTK_XSPHY config PHY_MTK_HDMI tristate "MediaTek HDMI-PHY Driver" - depends on ARCH_MEDIATEK && OF + depends on ARCH_MEDIATEK || COMPILE_TEST + depends on COMMON_CLK + depends on OF select GENERIC_PHY help Support HDMI PHY for Mediatek SoCs. diff --git a/drivers/phy/mediatek/phy-mtk-hdmi.c b/drivers/phy/mediatek/phy-mtk-hdmi.c index 47c029d4b270bb..206cc346872236 100644 --- a/drivers/phy/mediatek/phy-mtk-hdmi.c +++ b/drivers/phy/mediatek/phy-mtk-hdmi.c @@ -84,8 +84,9 @@ mtk_hdmi_phy_dev_get_ops(const struct mtk_hdmi_phy *hdmi_phy) hdmi_phy->conf->hdmi_phy_disable_tmds) return &mtk_hdmi_phy_dev_ops; - dev_err(hdmi_phy->dev, "Failed to get dev ops of phy\n"); - return NULL; + if (hdmi_phy) + dev_err(hdmi_phy->dev, "Failed to get dev ops of phy\n"); + return NULL; } static void mtk_hdmi_phy_clk_get_data(struct mtk_hdmi_phy *hdmi_phy, diff --git a/drivers/phy/motorola/phy-cpcap-usb.c b/drivers/phy/motorola/phy-cpcap-usb.c index 442522ba487f07..4728e2bff6620b 100644 --- a/drivers/phy/motorola/phy-cpcap-usb.c +++ b/drivers/phy/motorola/phy-cpcap-usb.c @@ -662,35 +662,42 @@ static int cpcap_usb_phy_probe(struct platform_device *pdev) generic_phy = devm_phy_create(ddata->dev, NULL, &ops); if (IS_ERR(generic_phy)) { error = PTR_ERR(generic_phy); - return PTR_ERR(generic_phy); + goto out_reg_disable; } phy_set_drvdata(generic_phy, ddata); phy_provider = devm_of_phy_provider_register(ddata->dev, of_phy_simple_xlate); - if (IS_ERR(phy_provider)) - return PTR_ERR(phy_provider); + if (IS_ERR(phy_provider)) { + error = PTR_ERR(phy_provider); + goto out_reg_disable; + } error = cpcap_usb_init_optional_pins(ddata); if (error) - return error; + goto out_reg_disable; cpcap_usb_init_optional_gpios(ddata); error = cpcap_usb_init_iio(ddata); if (error) - return error; + goto out_reg_disable; error = cpcap_usb_init_interrupts(pdev, ddata); if (error) - return error; + goto out_reg_disable; usb_add_phy_dev(&ddata->phy); atomic_set(&ddata->active, 1); schedule_delayed_work(&ddata->detect_work, msecs_to_jiffies(1)); return 0; + +out_reg_disable: + regulator_disable(ddata->vusb); + + return error; } static int cpcap_usb_phy_remove(struct platform_device *pdev) diff --git a/drivers/phy/renesas/phy-rcar-gen3-usb2.c b/drivers/phy/renesas/phy-rcar-gen3-usb2.c index e34e4475027cae..2cb949f931b69a 100644 --- a/drivers/phy/renesas/phy-rcar-gen3-usb2.c +++ b/drivers/phy/renesas/phy-rcar-gen3-usb2.c @@ -656,8 +656,10 @@ static int rcar_gen3_phy_usb2_probe(struct platform_device *pdev) */ pm_runtime_enable(dev); phy_usb2_ops = of_device_get_match_data(dev); - if (!phy_usb2_ops) - return -EINVAL; + if (!phy_usb2_ops) { + ret = -EINVAL; + goto error; + } mutex_init(&channel->lock); for (i = 0; i < NUM_OF_PHYS; i++) { diff --git a/drivers/phy/tegra/xusb.c b/drivers/phy/tegra/xusb.c index ad88d74c18842c..181a1be5f49177 100644 --- a/drivers/phy/tegra/xusb.c +++ b/drivers/phy/tegra/xusb.c @@ -688,7 +688,7 @@ static int tegra_xusb_setup_usb_role_switch(struct tegra_xusb_port *port) * reference to retrieve usb-phy details. */ port->usb_phy.dev = &lane->pad->lanes[port->index]->dev; - port->usb_phy.dev->driver = port->padctl->dev->driver; + port->usb_phy.dev->driver = port->dev.driver; port->usb_phy.otg->usb_phy = &port->usb_phy; port->usb_phy.otg->set_peripheral = tegra_xusb_set_peripheral; port->usb_phy.otg->set_host = tegra_xusb_set_host; diff --git a/drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c b/drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c index 34803a6c766432..5c1a109842a760 100644 --- a/drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c +++ b/drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c @@ -347,7 +347,7 @@ FUNC_GROUP_DECL(RMII4, F24, E23, E24, E25, C25, C24, B26, B25, B24); #define D22 40 SIG_EXPR_LIST_DECL_SESG(D22, SD1CLK, SD1, SIG_DESC_SET(SCU414, 8)); -SIG_EXPR_LIST_DECL_SEMG(D22, PWM8, PWM8G0, PWM8, SIG_DESC_SET(SCU414, 8)); +SIG_EXPR_LIST_DECL_SEMG(D22, PWM8, PWM8G0, PWM8, SIG_DESC_SET(SCU4B4, 8)); PIN_DECL_2(D22, GPIOF0, SD1CLK, PWM8); GROUP_DECL(PWM8G0, D22); diff --git a/drivers/pinctrl/core.c b/drivers/pinctrl/core.c index 3663d87f51a013..9fc4433fece4f0 100644 --- a/drivers/pinctrl/core.c +++ b/drivers/pinctrl/core.c @@ -1602,9 +1602,11 @@ static int pinctrl_pins_show(struct seq_file *s, void *what) struct pinctrl_dev *pctldev = s->private; const struct pinctrl_ops *ops = pctldev->desc->pctlops; unsigned i, pin; +#ifdef CONFIG_GPIOLIB struct pinctrl_gpio_range *range; unsigned int gpio_num; struct gpio_chip *chip; +#endif seq_printf(s, "registered pins: %d\n", pctldev->desc->npins); diff --git a/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c b/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c index 7e950f5d62d0fd..7815426e7aeaaf 100644 --- a/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c +++ b/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c @@ -926,6 +926,10 @@ int mtk_pinconf_adv_pull_set(struct mtk_pinctrl *hw, err = hw->soc->bias_set(hw, desc, pullup); if (err) return err; + } else if (hw->soc->bias_set_combo) { + err = hw->soc->bias_set_combo(hw, desc, pullup, arg); + if (err) + return err; } else { return -ENOTSUPP; } diff --git a/drivers/pinctrl/pinctrl-falcon.c b/drivers/pinctrl/pinctrl-falcon.c index 62c02b969327f8..7521a924dffb0a 100644 --- a/drivers/pinctrl/pinctrl-falcon.c +++ b/drivers/pinctrl/pinctrl-falcon.c @@ -431,24 +431,28 @@ static int pinctrl_falcon_probe(struct platform_device *pdev) /* load and remap the pad resources of the different banks */ for_each_compatible_node(np, NULL, "lantiq,pad-falcon") { - struct platform_device *ppdev = of_find_device_by_node(np); const __be32 *bank = of_get_property(np, "lantiq,bank", NULL); struct resource res; + struct platform_device *ppdev; u32 avail; int pins; if (!of_device_is_available(np)) continue; - if (!ppdev) { - dev_err(&pdev->dev, "failed to find pad pdev\n"); - continue; - } if (!bank || *bank >= PORTS) continue; if (of_address_to_resource(np, 0, &res)) continue; + + ppdev = of_find_device_by_node(np); + if (!ppdev) { + dev_err(&pdev->dev, "failed to find pad pdev\n"); + continue; + } + falcon_info.clk[*bank] = clk_get(&ppdev->dev, NULL); + put_device(&ppdev->dev); if (IS_ERR(falcon_info.clk[*bank])) { dev_err(&ppdev->dev, "failed to get clock\n"); of_node_put(np); diff --git a/drivers/pinctrl/pinctrl-ingenic.c b/drivers/pinctrl/pinctrl-ingenic.c index 621909b01debd7..033d142f0c2726 100644 --- a/drivers/pinctrl/pinctrl-ingenic.c +++ b/drivers/pinctrl/pinctrl-ingenic.c @@ -2052,7 +2052,7 @@ static inline bool ingenic_gpio_get_value(struct ingenic_gpio_chip *jzgc, static void ingenic_gpio_set_value(struct ingenic_gpio_chip *jzgc, u8 offset, int value) { - if (jzgc->jzpc->info->version >= ID_JZ4760) + if (jzgc->jzpc->info->version >= ID_JZ4770) ingenic_gpio_set_bit(jzgc, JZ4760_GPIO_PAT0, offset, !!value); else ingenic_gpio_set_bit(jzgc, JZ4740_GPIO_DATA, offset, !!value); @@ -2082,7 +2082,7 @@ static void irq_set_type(struct ingenic_gpio_chip *jzgc, break; } - if (jzgc->jzpc->info->version >= ID_JZ4760) { + if (jzgc->jzpc->info->version >= ID_JZ4770) { reg1 = JZ4760_GPIO_PAT1; reg2 = JZ4760_GPIO_PAT0; } else { @@ -2122,7 +2122,7 @@ static void ingenic_gpio_irq_enable(struct irq_data *irqd) struct ingenic_gpio_chip *jzgc = gpiochip_get_data(gc); int irq = irqd->hwirq; - if (jzgc->jzpc->info->version >= ID_JZ4760) + if (jzgc->jzpc->info->version >= ID_JZ4770) ingenic_gpio_set_bit(jzgc, JZ4760_GPIO_INT, irq, true); else ingenic_gpio_set_bit(jzgc, JZ4740_GPIO_SELECT, irq, true); @@ -2138,7 +2138,7 @@ static void ingenic_gpio_irq_disable(struct irq_data *irqd) ingenic_gpio_irq_mask(irqd); - if (jzgc->jzpc->info->version >= ID_JZ4760) + if (jzgc->jzpc->info->version >= ID_JZ4770) ingenic_gpio_set_bit(jzgc, JZ4760_GPIO_INT, irq, false); else ingenic_gpio_set_bit(jzgc, JZ4740_GPIO_SELECT, irq, false); @@ -2163,7 +2163,7 @@ static void ingenic_gpio_irq_ack(struct irq_data *irqd) irq_set_type(jzgc, irq, IRQ_TYPE_LEVEL_HIGH); } - if (jzgc->jzpc->info->version >= ID_JZ4760) + if (jzgc->jzpc->info->version >= ID_JZ4770) ingenic_gpio_set_bit(jzgc, JZ4760_GPIO_FLAG, irq, false); else ingenic_gpio_set_bit(jzgc, JZ4740_GPIO_DATA, irq, true); @@ -2220,7 +2220,7 @@ static void ingenic_gpio_irq_handler(struct irq_desc *desc) chained_irq_enter(irq_chip, desc); - if (jzgc->jzpc->info->version >= ID_JZ4760) + if (jzgc->jzpc->info->version >= ID_JZ4770) flag = ingenic_gpio_read_reg(jzgc, JZ4760_GPIO_FLAG); else flag = ingenic_gpio_read_reg(jzgc, JZ4740_GPIO_FLAG); @@ -2302,7 +2302,7 @@ static int ingenic_gpio_get_direction(struct gpio_chip *gc, unsigned int offset) struct ingenic_pinctrl *jzpc = jzgc->jzpc; unsigned int pin = gc->base + offset; - if (jzpc->info->version >= ID_JZ4760) { + if (jzpc->info->version >= ID_JZ4770) { if (ingenic_get_pin_config(jzpc, pin, JZ4760_GPIO_INT) || ingenic_get_pin_config(jzpc, pin, JZ4760_GPIO_PAT1)) return GPIO_LINE_DIRECTION_IN; @@ -2360,7 +2360,7 @@ static int ingenic_pinmux_set_pin_fn(struct ingenic_pinctrl *jzpc, ingenic_shadow_config_pin(jzpc, pin, JZ4760_GPIO_PAT1, func & 0x2); ingenic_shadow_config_pin(jzpc, pin, JZ4760_GPIO_PAT0, func & 0x1); ingenic_shadow_config_pin_load(jzpc, pin); - } else if (jzpc->info->version >= ID_JZ4760) { + } else if (jzpc->info->version >= ID_JZ4770) { ingenic_config_pin(jzpc, pin, JZ4760_GPIO_INT, false); ingenic_config_pin(jzpc, pin, GPIO_MSK, false); ingenic_config_pin(jzpc, pin, JZ4760_GPIO_PAT1, func & 0x2); @@ -2368,7 +2368,7 @@ static int ingenic_pinmux_set_pin_fn(struct ingenic_pinctrl *jzpc, } else { ingenic_config_pin(jzpc, pin, JZ4740_GPIO_FUNC, true); ingenic_config_pin(jzpc, pin, JZ4740_GPIO_TRIG, func & 0x2); - ingenic_config_pin(jzpc, pin, JZ4740_GPIO_SELECT, func > 0); + ingenic_config_pin(jzpc, pin, JZ4740_GPIO_SELECT, func & 0x1); } return 0; @@ -2418,7 +2418,7 @@ static int ingenic_pinmux_gpio_set_direction(struct pinctrl_dev *pctldev, ingenic_shadow_config_pin(jzpc, pin, GPIO_MSK, true); ingenic_shadow_config_pin(jzpc, pin, JZ4760_GPIO_PAT1, input); ingenic_shadow_config_pin_load(jzpc, pin); - } else if (jzpc->info->version >= ID_JZ4760) { + } else if (jzpc->info->version >= ID_JZ4770) { ingenic_config_pin(jzpc, pin, JZ4760_GPIO_INT, false); ingenic_config_pin(jzpc, pin, GPIO_MSK, true); ingenic_config_pin(jzpc, pin, JZ4760_GPIO_PAT1, input); @@ -2448,7 +2448,7 @@ static int ingenic_pinconf_get(struct pinctrl_dev *pctldev, unsigned int offt = pin / PINS_PER_GPIO_CHIP; bool pull; - if (jzpc->info->version >= ID_JZ4760) + if (jzpc->info->version >= ID_JZ4770) pull = !ingenic_get_pin_config(jzpc, pin, JZ4760_GPIO_PEN); else pull = !ingenic_get_pin_config(jzpc, pin, JZ4740_GPIO_PULL_DIS); @@ -2498,7 +2498,7 @@ static void ingenic_set_bias(struct ingenic_pinctrl *jzpc, REG_SET(X1830_GPIO_PEH), bias << idxh); } - } else if (jzpc->info->version >= ID_JZ4760) { + } else if (jzpc->info->version >= ID_JZ4770) { ingenic_config_pin(jzpc, pin, JZ4760_GPIO_PEN, !bias); } else { ingenic_config_pin(jzpc, pin, JZ4740_GPIO_PULL_DIS, !bias); @@ -2508,7 +2508,7 @@ static void ingenic_set_bias(struct ingenic_pinctrl *jzpc, static void ingenic_set_output_level(struct ingenic_pinctrl *jzpc, unsigned int pin, bool high) { - if (jzpc->info->version >= ID_JZ4760) + if (jzpc->info->version >= ID_JZ4770) ingenic_config_pin(jzpc, pin, JZ4760_GPIO_PAT0, high); else ingenic_config_pin(jzpc, pin, JZ4740_GPIO_DATA, high); diff --git a/drivers/pinctrl/qcom/pinctrl-msm.c b/drivers/pinctrl/qcom/pinctrl-msm.c index 77a25bdf0da70a..37526aa1fb2c4b 100644 --- a/drivers/pinctrl/qcom/pinctrl-msm.c +++ b/drivers/pinctrl/qcom/pinctrl-msm.c @@ -51,6 +51,7 @@ * @dual_edge_irqs: Bitmap of irqs that need sw emulated dual edge * detection. * @skip_wake_irqs: Skip IRQs that are handled by wakeup interrupt controller + * @disabled_for_mux: These IRQs were disabled because we muxed away. * @soc: Reference to soc_data of platform specific data. * @regs: Base addresses for the TLMM tiles. * @phys_base: Physical base address @@ -72,6 +73,7 @@ struct msm_pinctrl { DECLARE_BITMAP(dual_edge_irqs, MAX_NR_GPIO); DECLARE_BITMAP(enabled_irqs, MAX_NR_GPIO); DECLARE_BITMAP(skip_wake_irqs, MAX_NR_GPIO); + DECLARE_BITMAP(disabled_for_mux, MAX_NR_GPIO); const struct msm_pinctrl_soc_data *soc; void __iomem *regs[MAX_NR_TILES]; @@ -96,6 +98,14 @@ MSM_ACCESSOR(intr_cfg) MSM_ACCESSOR(intr_status) MSM_ACCESSOR(intr_target) +static void msm_ack_intr_status(struct msm_pinctrl *pctrl, + const struct msm_pingroup *g) +{ + u32 val = g->intr_ack_high ? BIT(g->intr_status_bit) : 0; + + msm_writel_intr_status(val, pctrl, g); +} + static int msm_get_groups_count(struct pinctrl_dev *pctldev) { struct msm_pinctrl *pctrl = pinctrl_dev_get_drvdata(pctldev); @@ -171,6 +181,10 @@ static int msm_pinmux_set_mux(struct pinctrl_dev *pctldev, unsigned group) { struct msm_pinctrl *pctrl = pinctrl_dev_get_drvdata(pctldev); + struct gpio_chip *gc = &pctrl->chip; + unsigned int irq = irq_find_mapping(gc->irq.domain, group); + struct irq_data *d = irq_get_irq_data(irq); + unsigned int gpio_func = pctrl->soc->gpio_func; const struct msm_pingroup *g; unsigned long flags; u32 val, mask; @@ -187,6 +201,20 @@ static int msm_pinmux_set_mux(struct pinctrl_dev *pctldev, if (WARN_ON(i == g->nfuncs)) return -EINVAL; + /* + * If an GPIO interrupt is setup on this pin then we need special + * handling. Specifically interrupt detection logic will still see + * the pin twiddle even when we're muxed away. + * + * When we see a pin with an interrupt setup on it then we'll disable + * (mask) interrupts on it when we mux away until we mux back. Note + * that disable_irq() refcounts and interrupts are disabled as long as + * at least one disable_irq() has been called. + */ + if (d && i != gpio_func && + !test_and_set_bit(d->hwirq, pctrl->disabled_for_mux)) + disable_irq(irq); + raw_spin_lock_irqsave(&pctrl->lock, flags); val = msm_readl_ctl(pctrl, g); @@ -196,6 +224,20 @@ static int msm_pinmux_set_mux(struct pinctrl_dev *pctldev, raw_spin_unlock_irqrestore(&pctrl->lock, flags); + if (d && i == gpio_func && + test_and_clear_bit(d->hwirq, pctrl->disabled_for_mux)) { + /* + * Clear interrupts detected while not GPIO since we only + * masked things. + */ + if (d->parent_data && test_bit(d->hwirq, pctrl->skip_wake_irqs)) + irq_chip_set_parent_state(d, IRQCHIP_STATE_PENDING, false); + else + msm_ack_intr_status(pctrl, g); + + enable_irq(irq); + } + return 0; } @@ -210,8 +252,7 @@ static int msm_pinmux_request_gpio(struct pinctrl_dev *pctldev, if (!g->nfuncs) return 0; - /* For now assume function 0 is GPIO because it always is */ - return msm_pinmux_set_mux(pctldev, g->funcs[0], offset); + return msm_pinmux_set_mux(pctldev, g->funcs[pctrl->soc->gpio_func], offset); } static const struct pinmux_ops msm_pinmux_ops = { @@ -774,7 +815,7 @@ static void msm_gpio_irq_mask(struct irq_data *d) raw_spin_unlock_irqrestore(&pctrl->lock, flags); } -static void msm_gpio_irq_clear_unmask(struct irq_data *d, bool status_clear) +static void msm_gpio_irq_unmask(struct irq_data *d) { struct gpio_chip *gc = irq_data_get_irq_chip_data(d); struct msm_pinctrl *pctrl = gpiochip_get_data(gc); @@ -792,17 +833,6 @@ static void msm_gpio_irq_clear_unmask(struct irq_data *d, bool status_clear) raw_spin_lock_irqsave(&pctrl->lock, flags); - if (status_clear) { - /* - * clear the interrupt status bit before unmask to avoid - * any erroneous interrupts that would have got latched - * when the interrupt is not in use. - */ - val = msm_readl_intr_status(pctrl, g); - val &= ~BIT(g->intr_status_bit); - msm_writel_intr_status(val, pctrl, g); - } - val = msm_readl_intr_cfg(pctrl, g); val |= BIT(g->intr_raw_status_bit); val |= BIT(g->intr_enable_bit); @@ -822,7 +852,7 @@ static void msm_gpio_irq_enable(struct irq_data *d) irq_chip_enable_parent(d); if (!test_bit(d->hwirq, pctrl->skip_wake_irqs)) - msm_gpio_irq_clear_unmask(d, true); + msm_gpio_irq_unmask(d); } static void msm_gpio_irq_disable(struct irq_data *d) @@ -837,11 +867,6 @@ static void msm_gpio_irq_disable(struct irq_data *d) msm_gpio_irq_mask(d); } -static void msm_gpio_irq_unmask(struct irq_data *d) -{ - msm_gpio_irq_clear_unmask(d, false); -} - /** * msm_gpio_update_dual_edge_parent() - Prime next edge for IRQs handled by parent. * @d: The irq dta. @@ -894,7 +919,6 @@ static void msm_gpio_irq_ack(struct irq_data *d) struct msm_pinctrl *pctrl = gpiochip_get_data(gc); const struct msm_pingroup *g; unsigned long flags; - u32 val; if (test_bit(d->hwirq, pctrl->skip_wake_irqs)) { if (test_bit(d->hwirq, pctrl->dual_edge_irqs)) @@ -906,12 +930,7 @@ static void msm_gpio_irq_ack(struct irq_data *d) raw_spin_lock_irqsave(&pctrl->lock, flags); - val = msm_readl_intr_status(pctrl, g); - if (g->intr_ack_high) - val |= BIT(g->intr_status_bit); - else - val &= ~BIT(g->intr_status_bit); - msm_writel_intr_status(val, pctrl, g); + msm_ack_intr_status(pctrl, g); if (test_bit(d->hwirq, pctrl->dual_edge_irqs)) msm_gpio_update_dual_edge_pos(pctrl, g, d); @@ -936,6 +955,7 @@ static int msm_gpio_irq_set_type(struct irq_data *d, unsigned int type) struct msm_pinctrl *pctrl = gpiochip_get_data(gc); const struct msm_pingroup *g; unsigned long flags; + bool was_enabled; u32 val; if (msm_gpio_needs_dual_edge_parent_workaround(d, type)) { @@ -997,6 +1017,7 @@ static int msm_gpio_irq_set_type(struct irq_data *d, unsigned int type) * could cause the INTR_STATUS to be set for EDGE interrupts. */ val = msm_readl_intr_cfg(pctrl, g); + was_enabled = val & BIT(g->intr_raw_status_bit); val |= BIT(g->intr_raw_status_bit); if (g->intr_detection_width == 2) { val &= ~(3 << g->intr_detection_bit); @@ -1046,6 +1067,14 @@ static int msm_gpio_irq_set_type(struct irq_data *d, unsigned int type) } msm_writel_intr_cfg(val, pctrl, g); + /* + * The first time we set RAW_STATUS_EN it could trigger an interrupt. + * Clear the interrupt. This is safe because we have + * IRQCHIP_SET_TYPE_MASKED. + */ + if (!was_enabled) + msm_ack_intr_status(pctrl, g); + if (test_bit(d->hwirq, pctrl->dual_edge_irqs)) msm_gpio_update_dual_edge_pos(pctrl, g, d); @@ -1099,16 +1128,11 @@ static int msm_gpio_irq_reqres(struct irq_data *d) } /* - * Clear the interrupt that may be pending before we enable - * the line. - * This is especially a problem with the GPIOs routed to the - * PDC. These GPIOs are direct-connect interrupts to the GIC. - * Disabling the interrupt line at the PDC does not prevent - * the interrupt from being latched at the GIC. The state at - * GIC needs to be cleared before enabling. + * The disable / clear-enable workaround we do in msm_pinmux_set_mux() + * only works if disable is not lazy since we only clear any bogus + * interrupt in hardware. Explicitly mark the interrupt as UNLAZY. */ - if (d->parent_data && test_bit(d->hwirq, pctrl->skip_wake_irqs)) - irq_chip_set_parent_state(d, IRQCHIP_STATE_PENDING, 0); + irq_set_status_flags(d->irq, IRQ_DISABLE_UNLAZY); return 0; out: diff --git a/drivers/pinctrl/qcom/pinctrl-msm.h b/drivers/pinctrl/qcom/pinctrl-msm.h index 333f99243c43ac..e31a5167c91ec9 100644 --- a/drivers/pinctrl/qcom/pinctrl-msm.h +++ b/drivers/pinctrl/qcom/pinctrl-msm.h @@ -118,6 +118,7 @@ struct msm_gpio_wakeirq_map { * @wakeirq_dual_edge_errata: If true then GPIOs using the wakeirq_map need * to be aware that their parent can't handle dual * edge interrupts. + * @gpio_func: Which function number is GPIO (usually 0). */ struct msm_pinctrl_soc_data { const struct pinctrl_pin_desc *pins; @@ -134,6 +135,7 @@ struct msm_pinctrl_soc_data { const struct msm_gpio_wakeirq_map *wakeirq_map; unsigned int nwakeirq_map; bool wakeirq_dual_edge_errata; + unsigned int gpio_func; }; extern const struct dev_pm_ops msm_pinctrl_dev_pm_ops; diff --git a/drivers/pinctrl/sunxi/pinctrl-sun50i-a100.c b/drivers/pinctrl/sunxi/pinctrl-sun50i-a100.c index 19cfd1e76ee2c9..e69f6da40dc0a2 100644 --- a/drivers/pinctrl/sunxi/pinctrl-sun50i-a100.c +++ b/drivers/pinctrl/sunxi/pinctrl-sun50i-a100.c @@ -677,7 +677,7 @@ static const struct sunxi_desc_pin a100_pins[] = { SUNXI_FUNCTION_IRQ_BANK(0x6, 6, 19)), }; -static const unsigned int a100_irq_bank_map[] = { 0, 1, 2, 3, 4, 5, 6}; +static const unsigned int a100_irq_bank_map[] = { 1, 2, 3, 4, 5, 6, 7}; static const struct sunxi_pinctrl_desc a100_pinctrl_data = { .pins = a100_pins, diff --git a/drivers/pinctrl/sunxi/pinctrl-sunxi.c b/drivers/pinctrl/sunxi/pinctrl-sunxi.c index 8e792f8e2dc9ad..e42a3a0005a72c 100644 --- a/drivers/pinctrl/sunxi/pinctrl-sunxi.c +++ b/drivers/pinctrl/sunxi/pinctrl-sunxi.c @@ -1142,20 +1142,22 @@ static void sunxi_pinctrl_irq_handler(struct irq_desc *desc) if (bank == pctl->desc->irq_banks) return; + chained_irq_enter(chip, desc); + reg = sunxi_irq_status_reg_from_bank(pctl->desc, bank); val = readl(pctl->membase + reg); if (val) { int irqoffset; - chained_irq_enter(chip, desc); for_each_set_bit(irqoffset, &val, IRQ_PER_BANK) { int pin_irq = irq_find_mapping(pctl->domain, bank * IRQ_PER_BANK + irqoffset); generic_handle_irq(pin_irq); } - chained_irq_exit(chip, desc); } + + chained_irq_exit(chip, desc); } static int sunxi_pinctrl_add_function(struct sunxi_pinctrl *pctl, diff --git a/drivers/platform/chrome/cros_ec_spi.c b/drivers/platform/chrome/cros_ec_spi.c index dfa1f816a45f40..f9df218fc2bbe2 100644 --- a/drivers/platform/chrome/cros_ec_spi.c +++ b/drivers/platform/chrome/cros_ec_spi.c @@ -742,7 +742,6 @@ static int cros_ec_spi_probe(struct spi_device *spi) int err; spi->bits_per_word = 8; - spi->mode = SPI_MODE_0; spi->rt = true; err = spi_setup(spi); if (err < 0) diff --git a/drivers/platform/x86/dell-smbios-base.c b/drivers/platform/x86/dell-smbios-base.c index 2e2cd565926aac..3a1dbf19944138 100644 --- a/drivers/platform/x86/dell-smbios-base.c +++ b/drivers/platform/x86/dell-smbios-base.c @@ -594,6 +594,7 @@ static int __init dell_smbios_init(void) if (wmi && smm) { pr_err("No SMBIOS backends available (wmi: %d, smm: %d)\n", wmi, smm); + ret = -ENODEV; goto fail_create_group; } diff --git a/drivers/platform/x86/hp-wmi.c b/drivers/platform/x86/hp-wmi.c index ecd477964d117a..18bf8aeb5f870c 100644 --- a/drivers/platform/x86/hp-wmi.c +++ b/drivers/platform/x86/hp-wmi.c @@ -247,7 +247,8 @@ static int hp_wmi_perform_query(int query, enum hp_wmi_command command, ret = bios_return->return_code; if (ret) { - if (ret != HPWMI_RET_UNKNOWN_CMDTYPE) + if (ret != HPWMI_RET_UNKNOWN_COMMAND && + ret != HPWMI_RET_UNKNOWN_CMDTYPE) pr_warn("query 0x%x returned error 0x%x\n", query, ret); goto out_free; } diff --git a/drivers/platform/x86/i2c-multi-instantiate.c b/drivers/platform/x86/i2c-multi-instantiate.c index 6acc8457866e1b..d3b5afbe4833e9 100644 --- a/drivers/platform/x86/i2c-multi-instantiate.c +++ b/drivers/platform/x86/i2c-multi-instantiate.c @@ -166,13 +166,29 @@ static const struct i2c_inst_data bsg2150_data[] = { {} }; -static const struct i2c_inst_data int3515_data[] = { - { "tps6598x", IRQ_RESOURCE_APIC, 0 }, - { "tps6598x", IRQ_RESOURCE_APIC, 1 }, - { "tps6598x", IRQ_RESOURCE_APIC, 2 }, - { "tps6598x", IRQ_RESOURCE_APIC, 3 }, - {} -}; +/* + * Device with _HID INT3515 (TI PD controllers) has some unresolved interrupt + * issues. The most common problem seen is interrupt flood. + * + * There are at least two known causes. Firstly, on some boards, the + * I2CSerialBus resource index does not match the Interrupt resource, i.e. they + * are not one-to-one mapped like in the array below. Secondly, on some boards + * the IRQ line from the PD controller is not actually connected at all. But the + * interrupt flood is also seen on some boards where those are not a problem, so + * there are some other problems as well. + * + * Because of the issues with the interrupt, the device is disabled for now. If + * you wish to debug the issues, uncomment the below, and add an entry for the + * INT3515 device to the i2c_multi_instance_ids table. + * + * static const struct i2c_inst_data int3515_data[] = { + * { "tps6598x", IRQ_RESOURCE_APIC, 0 }, + * { "tps6598x", IRQ_RESOURCE_APIC, 1 }, + * { "tps6598x", IRQ_RESOURCE_APIC, 2 }, + * { "tps6598x", IRQ_RESOURCE_APIC, 3 }, + * { } + * }; + */ /* * Note new device-ids must also be added to i2c_multi_instantiate_ids in @@ -181,7 +197,6 @@ static const struct i2c_inst_data int3515_data[] = { static const struct acpi_device_id i2c_multi_inst_acpi_ids[] = { { "BSG1160", (unsigned long)bsg1160_data }, { "BSG2150", (unsigned long)bsg2150_data }, - { "INT3515", (unsigned long)int3515_data }, { } }; MODULE_DEVICE_TABLE(acpi, i2c_multi_inst_acpi_ids); diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c index 7598cd46cf606d..5b81bafa5c1650 100644 --- a/drivers/platform/x86/ideapad-laptop.c +++ b/drivers/platform/x86/ideapad-laptop.c @@ -92,6 +92,7 @@ struct ideapad_private { struct dentry *debug; unsigned long cfg; bool has_hw_rfkill_switch; + bool has_touchpad_switch; const char *fnesc_guid; }; @@ -535,7 +536,9 @@ static umode_t ideapad_is_visible(struct kobject *kobj, } else if (attr == &dev_attr_fn_lock.attr) { supported = acpi_has_method(priv->adev->handle, "HALS") && acpi_has_method(priv->adev->handle, "SALS"); - } else + } else if (attr == &dev_attr_touchpad.attr) + supported = priv->has_touchpad_switch; + else supported = true; return supported ? attr->mode : 0; @@ -867,6 +870,9 @@ static void ideapad_sync_touchpad_state(struct ideapad_private *priv) { unsigned long value; + if (!priv->has_touchpad_switch) + return; + /* Without reading from EC touchpad LED doesn't switch state */ if (!read_ec_data(priv->adev->handle, VPCCMD_R_TOUCHPAD, &value)) { /* Some IdeaPads don't really turn off touchpad - they only @@ -989,6 +995,9 @@ static int ideapad_acpi_add(struct platform_device *pdev) priv->platform_device = pdev; priv->has_hw_rfkill_switch = dmi_check_system(hw_rfkill_list); + /* Most ideapads with ELAN0634 touchpad don't use EC touchpad switch */ + priv->has_touchpad_switch = !acpi_dev_present("ELAN0634", NULL, -1); + ret = ideapad_sysfs_init(priv); if (ret) return ret; @@ -1006,6 +1015,10 @@ static int ideapad_acpi_add(struct platform_device *pdev) if (!priv->has_hw_rfkill_switch) write_ec_cmd(priv->adev->handle, VPCCMD_W_RF, 1); + /* The same for Touchpad */ + if (!priv->has_touchpad_switch) + write_ec_cmd(priv->adev->handle, VPCCMD_W_TOUCHPAD, 1); + for (i = 0; i < IDEAPAD_RFKILL_DEV_NUM; i++) if (test_bit(ideapad_rfk_data[i].cfgbit, &priv->cfg)) ideapad_register_rfkill(priv, i); diff --git a/drivers/platform/x86/intel-vbtn.c b/drivers/platform/x86/intel-vbtn.c index 0419c8001fe33f..30a9062d2b4b88 100644 --- a/drivers/platform/x86/intel-vbtn.c +++ b/drivers/platform/x86/intel-vbtn.c @@ -15,9 +15,13 @@ #include #include +/* Returned when NOT in tablet mode on some HP Stream x360 11 models */ +#define VGBS_TABLET_MODE_FLAG_ALT 0x10 /* When NOT in tablet mode, VGBS returns with the flag 0x40 */ -#define TABLET_MODE_FLAG 0x40 -#define DOCK_MODE_FLAG 0x80 +#define VGBS_TABLET_MODE_FLAG 0x40 +#define VGBS_DOCK_MODE_FLAG 0x80 + +#define VGBS_TABLET_MODE_FLAGS (VGBS_TABLET_MODE_FLAG | VGBS_TABLET_MODE_FLAG_ALT) MODULE_LICENSE("GPL"); MODULE_AUTHOR("AceLan Kao"); @@ -72,9 +76,9 @@ static void detect_tablet_mode(struct platform_device *device) if (ACPI_FAILURE(status)) return; - m = !(vgbs & TABLET_MODE_FLAG); + m = !(vgbs & VGBS_TABLET_MODE_FLAGS); input_report_switch(priv->input_dev, SW_TABLET_MODE, m); - m = (vgbs & DOCK_MODE_FLAG) ? 1 : 0; + m = (vgbs & VGBS_DOCK_MODE_FLAG) ? 1 : 0; input_report_switch(priv->input_dev, SW_DOCK, m); } @@ -203,13 +207,19 @@ static const struct dmi_system_id dmi_switches_allow_list[] = { { .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), - DMI_MATCH(DMI_PRODUCT_NAME, "HP Stream x360 Convertible PC 11"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP Pavilion 13 x360 PC"), }, }, { .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), - DMI_MATCH(DMI_PRODUCT_NAME, "HP Pavilion 13 x360 PC"), + DMI_MATCH(DMI_SYS_VENDOR, "Acer"), + DMI_MATCH(DMI_PRODUCT_NAME, "Switch SA5-271"), + }, + }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 7352"), }, }, {} /* Array terminator */ diff --git a/drivers/platform/x86/mlx-platform.c b/drivers/platform/x86/mlx-platform.c index 986ad3dda1c108..8bce3da32a42b6 100644 --- a/drivers/platform/x86/mlx-platform.c +++ b/drivers/platform/x86/mlx-platform.c @@ -319,15 +319,6 @@ static struct i2c_mux_reg_platform_data mlxplat_extended_mux_data[] = { }; /* Platform hotplug devices */ -static struct i2c_board_info mlxplat_mlxcpld_psu[] = { - { - I2C_BOARD_INFO("24c02", 0x51), - }, - { - I2C_BOARD_INFO("24c02", 0x50), - }, -}; - static struct i2c_board_info mlxplat_mlxcpld_pwr[] = { { I2C_BOARD_INFO("dps460", 0x59), @@ -383,15 +374,13 @@ static struct mlxreg_core_data mlxplat_mlxcpld_default_psu_items_data[] = { .label = "psu1", .reg = MLXPLAT_CPLD_LPC_REG_PSU_OFFSET, .mask = BIT(0), - .hpdev.brdinfo = &mlxplat_mlxcpld_psu[0], - .hpdev.nr = MLXPLAT_CPLD_PSU_DEFAULT_NR, + .hpdev.nr = MLXPLAT_CPLD_NR_NONE, }, { .label = "psu2", .reg = MLXPLAT_CPLD_LPC_REG_PSU_OFFSET, .mask = BIT(1), - .hpdev.brdinfo = &mlxplat_mlxcpld_psu[1], - .hpdev.nr = MLXPLAT_CPLD_PSU_DEFAULT_NR, + .hpdev.nr = MLXPLAT_CPLD_NR_NONE, }, }; @@ -458,7 +447,7 @@ static struct mlxreg_core_item mlxplat_mlxcpld_default_items[] = { .aggr_mask = MLXPLAT_CPLD_AGGR_PSU_MASK_DEF, .reg = MLXPLAT_CPLD_LPC_REG_PSU_OFFSET, .mask = MLXPLAT_CPLD_PSU_MASK, - .count = ARRAY_SIZE(mlxplat_mlxcpld_psu), + .count = ARRAY_SIZE(mlxplat_mlxcpld_default_psu_items_data), .inversed = 1, .health = false, }, @@ -467,7 +456,7 @@ static struct mlxreg_core_item mlxplat_mlxcpld_default_items[] = { .aggr_mask = MLXPLAT_CPLD_AGGR_PWR_MASK_DEF, .reg = MLXPLAT_CPLD_LPC_REG_PWR_OFFSET, .mask = MLXPLAT_CPLD_PWR_MASK, - .count = ARRAY_SIZE(mlxplat_mlxcpld_pwr), + .count = ARRAY_SIZE(mlxplat_mlxcpld_default_pwr_items_data), .inversed = 0, .health = false, }, @@ -476,7 +465,7 @@ static struct mlxreg_core_item mlxplat_mlxcpld_default_items[] = { .aggr_mask = MLXPLAT_CPLD_AGGR_FAN_MASK_DEF, .reg = MLXPLAT_CPLD_LPC_REG_FAN_OFFSET, .mask = MLXPLAT_CPLD_FAN_MASK, - .count = ARRAY_SIZE(mlxplat_mlxcpld_fan), + .count = ARRAY_SIZE(mlxplat_mlxcpld_default_fan_items_data), .inversed = 1, .health = false, }, @@ -497,7 +486,7 @@ static struct mlxreg_core_item mlxplat_mlxcpld_comex_items[] = { .aggr_mask = MLXPLAT_CPLD_AGGR_MASK_CARRIER, .reg = MLXPLAT_CPLD_LPC_REG_PSU_OFFSET, .mask = MLXPLAT_CPLD_PSU_MASK, - .count = ARRAY_SIZE(mlxplat_mlxcpld_psu), + .count = ARRAY_SIZE(mlxplat_mlxcpld_default_psu_items_data), .inversed = 1, .health = false, }, @@ -506,7 +495,7 @@ static struct mlxreg_core_item mlxplat_mlxcpld_comex_items[] = { .aggr_mask = MLXPLAT_CPLD_AGGR_MASK_CARRIER, .reg = MLXPLAT_CPLD_LPC_REG_PWR_OFFSET, .mask = MLXPLAT_CPLD_PWR_MASK, - .count = ARRAY_SIZE(mlxplat_mlxcpld_pwr), + .count = ARRAY_SIZE(mlxplat_mlxcpld_default_pwr_items_data), .inversed = 0, .health = false, }, @@ -515,7 +504,7 @@ static struct mlxreg_core_item mlxplat_mlxcpld_comex_items[] = { .aggr_mask = MLXPLAT_CPLD_AGGR_MASK_CARRIER, .reg = MLXPLAT_CPLD_LPC_REG_FAN_OFFSET, .mask = MLXPLAT_CPLD_FAN_MASK, - .count = ARRAY_SIZE(mlxplat_mlxcpld_fan), + .count = ARRAY_SIZE(mlxplat_mlxcpld_default_fan_items_data), .inversed = 1, .health = false, }, @@ -603,15 +592,13 @@ static struct mlxreg_core_data mlxplat_mlxcpld_msn274x_psu_items_data[] = { .label = "psu1", .reg = MLXPLAT_CPLD_LPC_REG_PSU_OFFSET, .mask = BIT(0), - .hpdev.brdinfo = &mlxplat_mlxcpld_psu[0], - .hpdev.nr = MLXPLAT_CPLD_PSU_MSNXXXX_NR, + .hpdev.nr = MLXPLAT_CPLD_NR_NONE, }, { .label = "psu2", .reg = MLXPLAT_CPLD_LPC_REG_PSU_OFFSET, .mask = BIT(1), - .hpdev.brdinfo = &mlxplat_mlxcpld_psu[1], - .hpdev.nr = MLXPLAT_CPLD_PSU_MSNXXXX_NR, + .hpdev.nr = MLXPLAT_CPLD_NR_NONE, }, }; diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index c404706379d92a..69402758b99c3c 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -8782,6 +8782,7 @@ static const struct tpacpi_quirk fan_quirk_table[] __initconst = { TPACPI_Q_LNV3('N', '1', 'T', TPACPI_FAN_2CTL), /* P71 */ TPACPI_Q_LNV3('N', '1', 'U', TPACPI_FAN_2CTL), /* P51 */ TPACPI_Q_LNV3('N', '2', 'C', TPACPI_FAN_2CTL), /* P52 / P72 */ + TPACPI_Q_LNV3('N', '2', 'N', TPACPI_FAN_2CTL), /* P53 / P73 */ TPACPI_Q_LNV3('N', '2', 'E', TPACPI_FAN_2CTL), /* P1 / X1 Extreme (1st gen) */ TPACPI_Q_LNV3('N', '2', 'O', TPACPI_FAN_2CTL), /* P1 / X1 Extreme (2nd gen) */ TPACPI_Q_LNV3('N', '2', 'V', TPACPI_FAN_2CTL), /* P1 / X1 Extreme (3nd gen) */ diff --git a/drivers/platform/x86/touchscreen_dmi.c b/drivers/platform/x86/touchscreen_dmi.c index 5783139d0a1198..c4de932302d6b5 100644 --- a/drivers/platform/x86/touchscreen_dmi.c +++ b/drivers/platform/x86/touchscreen_dmi.c @@ -263,6 +263,16 @@ static const struct ts_dmi_data digma_citi_e200_data = { .properties = digma_citi_e200_props, }; +static const struct property_entry estar_beauty_hd_props[] = { + PROPERTY_ENTRY_BOOL("touchscreen-swapped-x-y"), + { } +}; + +static const struct ts_dmi_data estar_beauty_hd_data = { + .acpi_name = "GDIX1001:00", + .properties = estar_beauty_hd_props, +}; + static const struct property_entry gp_electronic_t701_props[] = { PROPERTY_ENTRY_U32("touchscreen-size-x", 960), PROPERTY_ENTRY_U32("touchscreen-size-y", 640), @@ -942,6 +952,14 @@ const struct dmi_system_id touchscreen_dmi_table[] = { DMI_MATCH(DMI_BOARD_NAME, "Cherry Trail CR"), }, }, + { + /* Estar Beauty HD (MID 7316R) */ + .driver_data = (void *)&estar_beauty_hd_data, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Estar"), + DMI_MATCH(DMI_PRODUCT_NAME, "eSTAR BEAUTY HD Intel Quad core"), + }, + }, { /* GP-electronic T701 */ .driver_data = (void *)&gp_electronic_t701_data, diff --git a/drivers/power/supply/axp288_charger.c b/drivers/power/supply/axp288_charger.c index 9d981b76c1e720..a4df1ea923864d 100644 --- a/drivers/power/supply/axp288_charger.c +++ b/drivers/power/supply/axp288_charger.c @@ -548,14 +548,15 @@ static irqreturn_t axp288_charger_irq_thread_handler(int irq, void *dev) /* * The HP Pavilion x2 10 series comes in a number of variants: - * Bay Trail SoC + AXP288 PMIC, DMI_BOARD_NAME: "815D" - * Cherry Trail SoC + AXP288 PMIC, DMI_BOARD_NAME: "813E" - * Cherry Trail SoC + TI PMIC, DMI_BOARD_NAME: "827C" or "82F4" + * Bay Trail SoC + AXP288 PMIC, Micro-USB, DMI_BOARD_NAME: "8021" + * Bay Trail SoC + AXP288 PMIC, Type-C, DMI_BOARD_NAME: "815D" + * Cherry Trail SoC + AXP288 PMIC, Type-C, DMI_BOARD_NAME: "813E" + * Cherry Trail SoC + TI PMIC, Type-C, DMI_BOARD_NAME: "827C" or "82F4" * - * The variants with the AXP288 PMIC are all kinds of special: + * The variants with the AXP288 + Type-C connector are all kinds of special: * - * 1. All variants use a Type-C connector which the AXP288 does not support, so - * when using a Type-C charger it is not recognized. Unlike most AXP288 devices, + * 1. They use a Type-C connector which the AXP288 does not support, so when + * using a Type-C charger it is not recognized. Unlike most AXP288 devices, * this model actually has mostly working ACPI AC / Battery code, the ACPI code * "solves" this by simply setting the input_current_limit to 3A. * There are still some issues with the ACPI code, so we use this native driver, @@ -578,12 +579,17 @@ static irqreturn_t axp288_charger_irq_thread_handler(int irq, void *dev) */ static const struct dmi_system_id axp288_hp_x2_dmi_ids[] = { { - /* - * Bay Trail model has "Hewlett-Packard" as sys_vendor, Cherry - * Trail model has "HP", so we only match on product_name. - */ .matches = { - DMI_MATCH(DMI_PRODUCT_NAME, "HP Pavilion x2 Detachable"), + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "HP Pavilion x2 Detachable"), + DMI_EXACT_MATCH(DMI_BOARD_NAME, "815D"), + }, + }, + { + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "HP"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "HP Pavilion x2 Detachable"), + DMI_EXACT_MATCH(DMI_BOARD_NAME, "813E"), }, }, {} /* Terminating entry */ diff --git a/drivers/power/supply/bq24190_charger.c b/drivers/power/supply/bq24190_charger.c index d14186525e1e90..845af0f44c022c 100644 --- a/drivers/power/supply/bq24190_charger.c +++ b/drivers/power/supply/bq24190_charger.c @@ -448,8 +448,10 @@ static ssize_t bq24190_sysfs_show(struct device *dev, return -EINVAL; ret = pm_runtime_get_sync(bdi->dev); - if (ret < 0) + if (ret < 0) { + pm_runtime_put_noidle(bdi->dev); return ret; + } ret = bq24190_read_mask(bdi, info->reg, info->mask, info->shift, &v); if (ret) @@ -1077,8 +1079,10 @@ static int bq24190_charger_get_property(struct power_supply *psy, dev_dbg(bdi->dev, "prop: %d\n", psp); ret = pm_runtime_get_sync(bdi->dev); - if (ret < 0) + if (ret < 0) { + pm_runtime_put_noidle(bdi->dev); return ret; + } switch (psp) { case POWER_SUPPLY_PROP_CHARGE_TYPE: @@ -1149,8 +1153,10 @@ static int bq24190_charger_set_property(struct power_supply *psy, dev_dbg(bdi->dev, "prop: %d\n", psp); ret = pm_runtime_get_sync(bdi->dev); - if (ret < 0) + if (ret < 0) { + pm_runtime_put_noidle(bdi->dev); return ret; + } switch (psp) { case POWER_SUPPLY_PROP_ONLINE: @@ -1410,8 +1416,10 @@ static int bq24190_battery_get_property(struct power_supply *psy, dev_dbg(bdi->dev, "prop: %d\n", psp); ret = pm_runtime_get_sync(bdi->dev); - if (ret < 0) + if (ret < 0) { + pm_runtime_put_noidle(bdi->dev); return ret; + } switch (psp) { case POWER_SUPPLY_PROP_STATUS: @@ -1456,8 +1464,10 @@ static int bq24190_battery_set_property(struct power_supply *psy, dev_dbg(bdi->dev, "prop: %d\n", psp); ret = pm_runtime_get_sync(bdi->dev); - if (ret < 0) + if (ret < 0) { + pm_runtime_put_noidle(bdi->dev); return ret; + } switch (psp) { case POWER_SUPPLY_PROP_ONLINE: diff --git a/drivers/power/supply/bq25890_charger.c b/drivers/power/supply/bq25890_charger.c index 34c21c51bac10e..945c3257ca9317 100644 --- a/drivers/power/supply/bq25890_charger.c +++ b/drivers/power/supply/bq25890_charger.c @@ -299,7 +299,7 @@ static const union { /* TODO: BQ25896 has max ICHG 3008 mA */ [TBL_ICHG] = { .rt = {0, 5056000, 64000} }, /* uA */ [TBL_ITERM] = { .rt = {64000, 1024000, 64000} }, /* uA */ - [TBL_IILIM] = { .rt = {50000, 3200000, 50000} }, /* uA */ + [TBL_IILIM] = { .rt = {100000, 3250000, 50000} }, /* uA */ [TBL_VREG] = { .rt = {3840000, 4608000, 16000} }, /* uV */ [TBL_BOOSTV] = { .rt = {4550000, 5510000, 64000} }, /* uV */ [TBL_SYSVMIN] = { .rt = {3000000, 3700000, 100000} }, /* uV */ diff --git a/drivers/power/supply/max17042_battery.c b/drivers/power/supply/max17042_battery.c index f284547913d6fd..2e9672fe4df1f2 100644 --- a/drivers/power/supply/max17042_battery.c +++ b/drivers/power/supply/max17042_battery.c @@ -85,9 +85,10 @@ static enum power_supply_property max17042_battery_props[] = { POWER_SUPPLY_PROP_TEMP_MAX, POWER_SUPPLY_PROP_HEALTH, POWER_SUPPLY_PROP_SCOPE, + POWER_SUPPLY_PROP_TIME_TO_EMPTY_NOW, + // these two have to be at the end on the list POWER_SUPPLY_PROP_CURRENT_NOW, POWER_SUPPLY_PROP_CURRENT_AVG, - POWER_SUPPLY_PROP_TIME_TO_EMPTY_NOW, }; static int max17042_get_temperature(struct max17042_chip *chip, int *temp) diff --git a/drivers/ps3/ps3stor_lib.c b/drivers/ps3/ps3stor_lib.c index 333ba83006e48d..a12a1ad9b5fe35 100644 --- a/drivers/ps3/ps3stor_lib.c +++ b/drivers/ps3/ps3stor_lib.c @@ -189,7 +189,7 @@ int ps3stor_setup(struct ps3_storage_device *dev, irq_handler_t handler) dev->bounce_lpar = ps3_mm_phys_to_lpar(__pa(dev->bounce_buf)); dev->bounce_dma = dma_map_single(&dev->sbd.core, dev->bounce_buf, dev->bounce_size, DMA_BIDIRECTIONAL); - if (!dev->bounce_dma) { + if (dma_mapping_error(&dev->sbd.core, dev->bounce_dma)) { dev_err(&dev->sbd.core, "%s:%u: map DMA region failed\n", __func__, __LINE__); error = -ENODEV; diff --git a/drivers/ptp/Kconfig b/drivers/ptp/Kconfig index 942f72d8151dae..deb429a3dff1d3 100644 --- a/drivers/ptp/Kconfig +++ b/drivers/ptp/Kconfig @@ -64,6 +64,7 @@ config DP83640_PHY depends on NETWORK_PHY_TIMESTAMPING depends on PHYLIB depends on PTP_1588_CLOCK + select CRC32 help Supports the DP83640 PHYTER with IEEE 1588 features. @@ -78,6 +79,7 @@ config DP83640_PHY config PTP_1588_CLOCK_INES tristate "ZHAW InES PTP time stamping IP core" depends on NETWORK_PHY_TIMESTAMPING + depends on HAS_IOMEM depends on PHYLIB depends on PTP_1588_CLOCK help diff --git a/drivers/pwm/pwm-imx27.c b/drivers/pwm/pwm-imx27.c index c50d453552bd4f..86bcafd23e4f61 100644 --- a/drivers/pwm/pwm-imx27.c +++ b/drivers/pwm/pwm-imx27.c @@ -235,8 +235,9 @@ static int pwm_imx27_apply(struct pwm_chip *chip, struct pwm_device *pwm, period_cycles /= prescale; c = clkrate * state->duty_cycle; - do_div(c, NSEC_PER_SEC * prescale); + do_div(c, NSEC_PER_SEC); duty_cycles = c; + duty_cycles /= prescale; /* * according to imx pwm RM, the real period value should be PERIOD diff --git a/drivers/pwm/pwm-lp3943.c b/drivers/pwm/pwm-lp3943.c index 7551253ada32b7..bf3f14fb5f2443 100644 --- a/drivers/pwm/pwm-lp3943.c +++ b/drivers/pwm/pwm-lp3943.c @@ -275,6 +275,7 @@ static int lp3943_pwm_probe(struct platform_device *pdev) lp3943_pwm->chip.dev = &pdev->dev; lp3943_pwm->chip.ops = &lp3943_pwm_ops; lp3943_pwm->chip.npwm = LP3943_NUM_PWMS; + lp3943_pwm->chip.base = -1; platform_set_drvdata(pdev, lp3943_pwm); diff --git a/drivers/pwm/pwm-sun4i.c b/drivers/pwm/pwm-sun4i.c index 38a4c5c1317b24..482d5b9cec1fb4 100644 --- a/drivers/pwm/pwm-sun4i.c +++ b/drivers/pwm/pwm-sun4i.c @@ -294,12 +294,8 @@ static int sun4i_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, ctrl |= BIT_CH(PWM_CLK_GATING, pwm->hwpwm); - if (state->enabled) { + if (state->enabled) ctrl |= BIT_CH(PWM_EN, pwm->hwpwm); - } else { - ctrl &= ~BIT_CH(PWM_EN, pwm->hwpwm); - ctrl &= ~BIT_CH(PWM_CLK_GATING, pwm->hwpwm); - } sun4i_pwm_writel(sun4i_pwm, ctrl, PWM_CTRL_REG); diff --git a/drivers/pwm/pwm-zx.c b/drivers/pwm/pwm-zx.c index e2c21cc34a96aa..3763ce5311ac2b 100644 --- a/drivers/pwm/pwm-zx.c +++ b/drivers/pwm/pwm-zx.c @@ -238,6 +238,7 @@ static int zx_pwm_probe(struct platform_device *pdev) ret = pwmchip_add(&zpc->chip); if (ret < 0) { dev_err(&pdev->dev, "failed to add PWM chip: %d\n", ret); + clk_disable_unprepare(zpc->pclk); return ret; } diff --git a/drivers/regulator/axp20x-regulator.c b/drivers/regulator/axp20x-regulator.c index cd1224182ad743..90cb8445f72167 100644 --- a/drivers/regulator/axp20x-regulator.c +++ b/drivers/regulator/axp20x-regulator.c @@ -594,7 +594,7 @@ static const struct regulator_desc axp22x_regulators[] = { AXP22X_DLDO1_V_OUT, AXP22X_DLDO1_V_OUT_MASK, AXP22X_PWR_OUT_CTRL2, AXP22X_PWR_OUT_DLDO1_MASK), AXP_DESC(AXP22X, DLDO2, "dldo2", "dldoin", 700, 3300, 100, - AXP22X_DLDO2_V_OUT, AXP22X_PWR_OUT_DLDO2_MASK, + AXP22X_DLDO2_V_OUT, AXP22X_DLDO2_V_OUT_MASK, AXP22X_PWR_OUT_CTRL2, AXP22X_PWR_OUT_DLDO2_MASK), AXP_DESC(AXP22X, DLDO3, "dldo3", "dldoin", 700, 3300, 100, AXP22X_DLDO3_V_OUT, AXP22X_DLDO3_V_OUT_MASK, diff --git a/drivers/regulator/bd718x7-regulator.c b/drivers/regulator/bd718x7-regulator.c index 0774467994fbe2..3333b8905f1b7d 100644 --- a/drivers/regulator/bd718x7-regulator.c +++ b/drivers/regulator/bd718x7-regulator.c @@ -15,6 +15,36 @@ #include #include +/* Typical regulator startup times as per data sheet in uS */ +#define BD71847_BUCK1_STARTUP_TIME 144 +#define BD71847_BUCK2_STARTUP_TIME 162 +#define BD71847_BUCK3_STARTUP_TIME 162 +#define BD71847_BUCK4_STARTUP_TIME 240 +#define BD71847_BUCK5_STARTUP_TIME 270 +#define BD71847_BUCK6_STARTUP_TIME 200 +#define BD71847_LDO1_STARTUP_TIME 440 +#define BD71847_LDO2_STARTUP_TIME 370 +#define BD71847_LDO3_STARTUP_TIME 310 +#define BD71847_LDO4_STARTUP_TIME 400 +#define BD71847_LDO5_STARTUP_TIME 530 +#define BD71847_LDO6_STARTUP_TIME 400 + +#define BD71837_BUCK1_STARTUP_TIME 160 +#define BD71837_BUCK2_STARTUP_TIME 180 +#define BD71837_BUCK3_STARTUP_TIME 180 +#define BD71837_BUCK4_STARTUP_TIME 180 +#define BD71837_BUCK5_STARTUP_TIME 160 +#define BD71837_BUCK6_STARTUP_TIME 240 +#define BD71837_BUCK7_STARTUP_TIME 220 +#define BD71837_BUCK8_STARTUP_TIME 200 +#define BD71837_LDO1_STARTUP_TIME 440 +#define BD71837_LDO2_STARTUP_TIME 370 +#define BD71837_LDO3_STARTUP_TIME 310 +#define BD71837_LDO4_STARTUP_TIME 400 +#define BD71837_LDO5_STARTUP_TIME 310 +#define BD71837_LDO6_STARTUP_TIME 400 +#define BD71837_LDO7_STARTUP_TIME 530 + /* * BD718(37/47/50) have two "enable control modes". ON/OFF can either be * controlled by software - or by PMIC internal HW state machine. Whether @@ -613,6 +643,7 @@ static struct bd718xx_regulator_data bd71847_regulators[] = { .vsel_mask = DVS_BUCK_RUN_MASK, .enable_reg = BD718XX_REG_BUCK1_CTRL, .enable_mask = BD718XX_BUCK_EN, + .enable_time = BD71847_BUCK1_STARTUP_TIME, .owner = THIS_MODULE, .of_parse_cb = buck_set_hw_dvs_levels, }, @@ -646,6 +677,7 @@ static struct bd718xx_regulator_data bd71847_regulators[] = { .vsel_mask = DVS_BUCK_RUN_MASK, .enable_reg = BD718XX_REG_BUCK2_CTRL, .enable_mask = BD718XX_BUCK_EN, + .enable_time = BD71847_BUCK2_STARTUP_TIME, .owner = THIS_MODULE, .of_parse_cb = buck_set_hw_dvs_levels, }, @@ -680,6 +712,7 @@ static struct bd718xx_regulator_data bd71847_regulators[] = { .linear_range_selectors = bd71847_buck3_volt_range_sel, .enable_reg = BD718XX_REG_1ST_NODVS_BUCK_CTRL, .enable_mask = BD718XX_BUCK_EN, + .enable_time = BD71847_BUCK3_STARTUP_TIME, .owner = THIS_MODULE, }, .init = { @@ -706,6 +739,7 @@ static struct bd718xx_regulator_data bd71847_regulators[] = { .vsel_range_mask = BD71847_BUCK4_RANGE_MASK, .linear_range_selectors = bd71847_buck4_volt_range_sel, .enable_mask = BD718XX_BUCK_EN, + .enable_time = BD71847_BUCK4_STARTUP_TIME, .owner = THIS_MODULE, }, .init = { @@ -727,6 +761,7 @@ static struct bd718xx_regulator_data bd71847_regulators[] = { .vsel_mask = BD718XX_3RD_NODVS_BUCK_MASK, .enable_reg = BD718XX_REG_3RD_NODVS_BUCK_CTRL, .enable_mask = BD718XX_BUCK_EN, + .enable_time = BD71847_BUCK5_STARTUP_TIME, .owner = THIS_MODULE, }, .init = { @@ -750,6 +785,7 @@ static struct bd718xx_regulator_data bd71847_regulators[] = { .vsel_mask = BD718XX_4TH_NODVS_BUCK_MASK, .enable_reg = BD718XX_REG_4TH_NODVS_BUCK_CTRL, .enable_mask = BD718XX_BUCK_EN, + .enable_time = BD71847_BUCK6_STARTUP_TIME, .owner = THIS_MODULE, }, .init = { @@ -775,6 +811,7 @@ static struct bd718xx_regulator_data bd71847_regulators[] = { .linear_range_selectors = bd718xx_ldo1_volt_range_sel, .enable_reg = BD718XX_REG_LDO1_VOLT, .enable_mask = BD718XX_LDO_EN, + .enable_time = BD71847_LDO1_STARTUP_TIME, .owner = THIS_MODULE, }, .init = { @@ -796,6 +833,7 @@ static struct bd718xx_regulator_data bd71847_regulators[] = { .n_voltages = ARRAY_SIZE(ldo_2_volts), .enable_reg = BD718XX_REG_LDO2_VOLT, .enable_mask = BD718XX_LDO_EN, + .enable_time = BD71847_LDO2_STARTUP_TIME, .owner = THIS_MODULE, }, .init = { @@ -818,6 +856,7 @@ static struct bd718xx_regulator_data bd71847_regulators[] = { .vsel_mask = BD718XX_LDO3_MASK, .enable_reg = BD718XX_REG_LDO3_VOLT, .enable_mask = BD718XX_LDO_EN, + .enable_time = BD71847_LDO3_STARTUP_TIME, .owner = THIS_MODULE, }, .init = { @@ -840,6 +879,7 @@ static struct bd718xx_regulator_data bd71847_regulators[] = { .vsel_mask = BD718XX_LDO4_MASK, .enable_reg = BD718XX_REG_LDO4_VOLT, .enable_mask = BD718XX_LDO_EN, + .enable_time = BD71847_LDO4_STARTUP_TIME, .owner = THIS_MODULE, }, .init = { @@ -865,6 +905,7 @@ static struct bd718xx_regulator_data bd71847_regulators[] = { .linear_range_selectors = bd71847_ldo5_volt_range_sel, .enable_reg = BD718XX_REG_LDO5_VOLT, .enable_mask = BD718XX_LDO_EN, + .enable_time = BD71847_LDO5_STARTUP_TIME, .owner = THIS_MODULE, }, .init = { @@ -889,6 +930,7 @@ static struct bd718xx_regulator_data bd71847_regulators[] = { .vsel_mask = BD718XX_LDO6_MASK, .enable_reg = BD718XX_REG_LDO6_VOLT, .enable_mask = BD718XX_LDO_EN, + .enable_time = BD71847_LDO6_STARTUP_TIME, .owner = THIS_MODULE, }, .init = { @@ -942,6 +984,7 @@ static struct bd718xx_regulator_data bd71837_regulators[] = { .vsel_mask = DVS_BUCK_RUN_MASK, .enable_reg = BD718XX_REG_BUCK1_CTRL, .enable_mask = BD718XX_BUCK_EN, + .enable_time = BD71837_BUCK1_STARTUP_TIME, .owner = THIS_MODULE, .of_parse_cb = buck_set_hw_dvs_levels, }, @@ -975,6 +1018,7 @@ static struct bd718xx_regulator_data bd71837_regulators[] = { .vsel_mask = DVS_BUCK_RUN_MASK, .enable_reg = BD718XX_REG_BUCK2_CTRL, .enable_mask = BD718XX_BUCK_EN, + .enable_time = BD71837_BUCK2_STARTUP_TIME, .owner = THIS_MODULE, .of_parse_cb = buck_set_hw_dvs_levels, }, @@ -1005,6 +1049,7 @@ static struct bd718xx_regulator_data bd71837_regulators[] = { .vsel_mask = DVS_BUCK_RUN_MASK, .enable_reg = BD71837_REG_BUCK3_CTRL, .enable_mask = BD718XX_BUCK_EN, + .enable_time = BD71837_BUCK3_STARTUP_TIME, .owner = THIS_MODULE, .of_parse_cb = buck_set_hw_dvs_levels, }, @@ -1033,6 +1078,7 @@ static struct bd718xx_regulator_data bd71837_regulators[] = { .vsel_mask = DVS_BUCK_RUN_MASK, .enable_reg = BD71837_REG_BUCK4_CTRL, .enable_mask = BD718XX_BUCK_EN, + .enable_time = BD71837_BUCK4_STARTUP_TIME, .owner = THIS_MODULE, .of_parse_cb = buck_set_hw_dvs_levels, }, @@ -1065,6 +1111,7 @@ static struct bd718xx_regulator_data bd71837_regulators[] = { .linear_range_selectors = bd71837_buck5_volt_range_sel, .enable_reg = BD718XX_REG_1ST_NODVS_BUCK_CTRL, .enable_mask = BD718XX_BUCK_EN, + .enable_time = BD71837_BUCK5_STARTUP_TIME, .owner = THIS_MODULE, }, .init = { @@ -1088,6 +1135,7 @@ static struct bd718xx_regulator_data bd71837_regulators[] = { .vsel_mask = BD71837_BUCK6_MASK, .enable_reg = BD718XX_REG_2ND_NODVS_BUCK_CTRL, .enable_mask = BD718XX_BUCK_EN, + .enable_time = BD71837_BUCK6_STARTUP_TIME, .owner = THIS_MODULE, }, .init = { @@ -1109,6 +1157,7 @@ static struct bd718xx_regulator_data bd71837_regulators[] = { .vsel_mask = BD718XX_3RD_NODVS_BUCK_MASK, .enable_reg = BD718XX_REG_3RD_NODVS_BUCK_CTRL, .enable_mask = BD718XX_BUCK_EN, + .enable_time = BD71837_BUCK7_STARTUP_TIME, .owner = THIS_MODULE, }, .init = { @@ -1132,6 +1181,7 @@ static struct bd718xx_regulator_data bd71837_regulators[] = { .vsel_mask = BD718XX_4TH_NODVS_BUCK_MASK, .enable_reg = BD718XX_REG_4TH_NODVS_BUCK_CTRL, .enable_mask = BD718XX_BUCK_EN, + .enable_time = BD71837_BUCK8_STARTUP_TIME, .owner = THIS_MODULE, }, .init = { @@ -1157,6 +1207,7 @@ static struct bd718xx_regulator_data bd71837_regulators[] = { .linear_range_selectors = bd718xx_ldo1_volt_range_sel, .enable_reg = BD718XX_REG_LDO1_VOLT, .enable_mask = BD718XX_LDO_EN, + .enable_time = BD71837_LDO1_STARTUP_TIME, .owner = THIS_MODULE, }, .init = { @@ -1178,6 +1229,7 @@ static struct bd718xx_regulator_data bd71837_regulators[] = { .n_voltages = ARRAY_SIZE(ldo_2_volts), .enable_reg = BD718XX_REG_LDO2_VOLT, .enable_mask = BD718XX_LDO_EN, + .enable_time = BD71837_LDO2_STARTUP_TIME, .owner = THIS_MODULE, }, .init = { @@ -1200,6 +1252,7 @@ static struct bd718xx_regulator_data bd71837_regulators[] = { .vsel_mask = BD718XX_LDO3_MASK, .enable_reg = BD718XX_REG_LDO3_VOLT, .enable_mask = BD718XX_LDO_EN, + .enable_time = BD71837_LDO3_STARTUP_TIME, .owner = THIS_MODULE, }, .init = { @@ -1222,6 +1275,7 @@ static struct bd718xx_regulator_data bd71837_regulators[] = { .vsel_mask = BD718XX_LDO4_MASK, .enable_reg = BD718XX_REG_LDO4_VOLT, .enable_mask = BD718XX_LDO_EN, + .enable_time = BD71837_LDO4_STARTUP_TIME, .owner = THIS_MODULE, }, .init = { @@ -1246,6 +1300,7 @@ static struct bd718xx_regulator_data bd71837_regulators[] = { .vsel_mask = BD71837_LDO5_MASK, .enable_reg = BD718XX_REG_LDO5_VOLT, .enable_mask = BD718XX_LDO_EN, + .enable_time = BD71837_LDO5_STARTUP_TIME, .owner = THIS_MODULE, }, .init = { @@ -1272,6 +1327,7 @@ static struct bd718xx_regulator_data bd71837_regulators[] = { .vsel_mask = BD718XX_LDO6_MASK, .enable_reg = BD718XX_REG_LDO6_VOLT, .enable_mask = BD718XX_LDO_EN, + .enable_time = BD71837_LDO6_STARTUP_TIME, .owner = THIS_MODULE, }, .init = { @@ -1296,6 +1352,7 @@ static struct bd718xx_regulator_data bd71837_regulators[] = { .vsel_mask = BD71837_LDO7_MASK, .enable_reg = BD71837_REG_LDO7_VOLT, .enable_mask = BD718XX_LDO_EN, + .enable_time = BD71837_LDO7_STARTUP_TIME, .owner = THIS_MODULE, }, .init = { diff --git a/drivers/regulator/qcom-rpmh-regulator.c b/drivers/regulator/qcom-rpmh-regulator.c index d488325499a9f4..a22c4b5f64f7e3 100644 --- a/drivers/regulator/qcom-rpmh-regulator.c +++ b/drivers/regulator/qcom-rpmh-regulator.c @@ -726,7 +726,7 @@ static const struct rpmh_vreg_hw_data pmic5_ftsmps510 = { static const struct rpmh_vreg_hw_data pmic5_hfsmps515 = { .regulator_type = VRM, .ops = &rpmh_regulator_vrm_ops, - .voltage_range = REGULATOR_LINEAR_RANGE(2800000, 0, 4, 1600), + .voltage_range = REGULATOR_LINEAR_RANGE(2800000, 0, 4, 16000), .n_voltages = 5, .pmic_mode_map = pmic_mode_map_pmic5_smps, .of_map_mode = rpmh_regulator_pmic4_smps_of_map_mode, diff --git a/drivers/remoteproc/mtk_common.h b/drivers/remoteproc/mtk_common.h index 47b4561443a94c..f2bcc9d9fda65b 100644 --- a/drivers/remoteproc/mtk_common.h +++ b/drivers/remoteproc/mtk_common.h @@ -32,22 +32,22 @@ #define MT8183_SCP_CACHESIZE_8KB BIT(8) #define MT8183_SCP_CACHE_CON_WAYEN BIT(10) -#define MT8192_L2TCM_SRAM_PD_0 0x210C0 -#define MT8192_L2TCM_SRAM_PD_1 0x210C4 -#define MT8192_L2TCM_SRAM_PD_2 0x210C8 -#define MT8192_L1TCM_SRAM_PDN 0x2102C -#define MT8192_CPU0_SRAM_PD 0x21080 - -#define MT8192_SCP2APMCU_IPC_SET 0x24080 -#define MT8192_SCP2APMCU_IPC_CLR 0x24084 +#define MT8192_L2TCM_SRAM_PD_0 0x10C0 +#define MT8192_L2TCM_SRAM_PD_1 0x10C4 +#define MT8192_L2TCM_SRAM_PD_2 0x10C8 +#define MT8192_L1TCM_SRAM_PDN 0x102C +#define MT8192_CPU0_SRAM_PD 0x1080 + +#define MT8192_SCP2APMCU_IPC_SET 0x4080 +#define MT8192_SCP2APMCU_IPC_CLR 0x4084 #define MT8192_SCP_IPC_INT_BIT BIT(0) -#define MT8192_SCP2SPM_IPC_CLR 0x24094 -#define MT8192_GIPC_IN_SET 0x24098 +#define MT8192_SCP2SPM_IPC_CLR 0x4094 +#define MT8192_GIPC_IN_SET 0x4098 #define MT8192_HOST_IPC_INT_BIT BIT(0) -#define MT8192_CORE0_SW_RSTN_CLR 0x30000 -#define MT8192_CORE0_SW_RSTN_SET 0x30004 -#define MT8192_CORE0_WDT_CFG 0x30034 +#define MT8192_CORE0_SW_RSTN_CLR 0x10000 +#define MT8192_CORE0_SW_RSTN_SET 0x10004 +#define MT8192_CORE0_WDT_CFG 0x10034 #define SCP_FW_VER_LEN 32 #define SCP_SHARE_BUFFER_SIZE 288 diff --git a/drivers/remoteproc/mtk_scp.c b/drivers/remoteproc/mtk_scp.c index 577cbd5d421ece..52fa01d67c18e7 100644 --- a/drivers/remoteproc/mtk_scp.c +++ b/drivers/remoteproc/mtk_scp.c @@ -350,9 +350,10 @@ static int scp_load(struct rproc *rproc, const struct firmware *fw) ret = scp->data->scp_before_load(scp); if (ret < 0) - return ret; + goto leave; ret = scp_elf_load_segments(rproc, fw); +leave: clk_disable_unprepare(scp->clk); return ret; @@ -772,12 +773,14 @@ static const struct mtk_scp_of_data mt8192_of_data = { .host_to_scp_int_bit = MT8192_HOST_IPC_INT_BIT, }; +#if defined(CONFIG_OF) static const struct of_device_id mtk_scp_of_match[] = { { .compatible = "mediatek,mt8183-scp", .data = &mt8183_of_data }, { .compatible = "mediatek,mt8192-scp", .data = &mt8192_of_data }, {}, }; MODULE_DEVICE_TABLE(of, mtk_scp_of_match); +#endif static struct platform_driver mtk_scp_driver = { .probe = scp_probe, diff --git a/drivers/remoteproc/qcom_q6v5_adsp.c b/drivers/remoteproc/qcom_q6v5_adsp.c index efb2c1aa80a3c7..9eb599701f9b04 100644 --- a/drivers/remoteproc/qcom_q6v5_adsp.c +++ b/drivers/remoteproc/qcom_q6v5_adsp.c @@ -193,8 +193,10 @@ static int adsp_start(struct rproc *rproc) dev_pm_genpd_set_performance_state(adsp->dev, INT_MAX); ret = pm_runtime_get_sync(adsp->dev); - if (ret) + if (ret) { + pm_runtime_put_noidle(adsp->dev); goto disable_xo_clk; + } ret = clk_bulk_prepare_enable(adsp->num_clks, adsp->clks); if (ret) { @@ -362,15 +364,12 @@ static int adsp_init_mmio(struct qcom_adsp *adsp, struct platform_device *pdev) { struct device_node *syscon; - struct resource *res; int ret; - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - adsp->qdsp6ss_base = devm_ioremap(&pdev->dev, res->start, - resource_size(res)); - if (!adsp->qdsp6ss_base) { + adsp->qdsp6ss_base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(adsp->qdsp6ss_base)) { dev_err(adsp->dev, "failed to map QDSP6SS registers\n"); - return -ENOMEM; + return PTR_ERR(adsp->qdsp6ss_base); } syscon = of_parse_phandle(pdev->dev.of_node, "qcom,halt-regs", 0); diff --git a/drivers/remoteproc/qcom_q6v5_mss.c b/drivers/remoteproc/qcom_q6v5_mss.c index eb3457a6c3b739..ba6f7551242de6 100644 --- a/drivers/remoteproc/qcom_q6v5_mss.c +++ b/drivers/remoteproc/qcom_q6v5_mss.c @@ -349,8 +349,11 @@ static int q6v5_pds_enable(struct q6v5 *qproc, struct device **pds, for (i = 0; i < pd_count; i++) { dev_pm_genpd_set_performance_state(pds[i], INT_MAX); ret = pm_runtime_get_sync(pds[i]); - if (ret < 0) + if (ret < 0) { + pm_runtime_put_noidle(pds[i]); + dev_pm_genpd_set_performance_state(pds[i], 0); goto unroll_pd_votes; + } } return 0; diff --git a/drivers/remoteproc/qcom_q6v5_pas.c b/drivers/remoteproc/qcom_q6v5_pas.c index 3837f23995e056..0678b417707ef1 100644 --- a/drivers/remoteproc/qcom_q6v5_pas.c +++ b/drivers/remoteproc/qcom_q6v5_pas.c @@ -90,8 +90,11 @@ static int adsp_pds_enable(struct qcom_adsp *adsp, struct device **pds, for (i = 0; i < pd_count; i++) { dev_pm_genpd_set_performance_state(pds[i], INT_MAX); ret = pm_runtime_get_sync(pds[i]); - if (ret < 0) + if (ret < 0) { + pm_runtime_put_noidle(pds[i]); + dev_pm_genpd_set_performance_state(pds[i], 0); goto unroll_pd_votes; + } } return 0; diff --git a/drivers/remoteproc/qcom_sysmon.c b/drivers/remoteproc/qcom_sysmon.c index 9eb2f6bccea630..b37b111b15b39e 100644 --- a/drivers/remoteproc/qcom_sysmon.c +++ b/drivers/remoteproc/qcom_sysmon.c @@ -22,6 +22,9 @@ struct qcom_sysmon { struct rproc_subdev subdev; struct rproc *rproc; + int state; + struct mutex state_lock; + struct list_head node; const char *name; @@ -448,7 +451,10 @@ static int sysmon_prepare(struct rproc_subdev *subdev) .ssr_event = SSCTL_SSR_EVENT_BEFORE_POWERUP }; + mutex_lock(&sysmon->state_lock); + sysmon->state = SSCTL_SSR_EVENT_BEFORE_POWERUP; blocking_notifier_call_chain(&sysmon_notifiers, 0, (void *)&event); + mutex_unlock(&sysmon->state_lock); return 0; } @@ -472,20 +478,25 @@ static int sysmon_start(struct rproc_subdev *subdev) .ssr_event = SSCTL_SSR_EVENT_AFTER_POWERUP }; + mutex_lock(&sysmon->state_lock); + sysmon->state = SSCTL_SSR_EVENT_AFTER_POWERUP; blocking_notifier_call_chain(&sysmon_notifiers, 0, (void *)&event); + mutex_unlock(&sysmon->state_lock); mutex_lock(&sysmon_lock); list_for_each_entry(target, &sysmon_list, node) { - if (target == sysmon || - target->rproc->state != RPROC_RUNNING) + if (target == sysmon) continue; + mutex_lock(&target->state_lock); event.subsys_name = target->name; + event.ssr_event = target->state; if (sysmon->ssctl_version == 2) ssctl_send_event(sysmon, &event); else if (sysmon->ept) sysmon_send_event(sysmon, &event); + mutex_unlock(&target->state_lock); } mutex_unlock(&sysmon_lock); @@ -500,7 +511,10 @@ static void sysmon_stop(struct rproc_subdev *subdev, bool crashed) .ssr_event = SSCTL_SSR_EVENT_BEFORE_SHUTDOWN }; + mutex_lock(&sysmon->state_lock); + sysmon->state = SSCTL_SSR_EVENT_BEFORE_SHUTDOWN; blocking_notifier_call_chain(&sysmon_notifiers, 0, (void *)&event); + mutex_unlock(&sysmon->state_lock); /* Don't request graceful shutdown if we've crashed */ if (crashed) @@ -521,7 +535,10 @@ static void sysmon_unprepare(struct rproc_subdev *subdev) .ssr_event = SSCTL_SSR_EVENT_AFTER_SHUTDOWN }; + mutex_lock(&sysmon->state_lock); + sysmon->state = SSCTL_SSR_EVENT_AFTER_SHUTDOWN; blocking_notifier_call_chain(&sysmon_notifiers, 0, (void *)&event); + mutex_unlock(&sysmon->state_lock); } /** @@ -534,11 +551,10 @@ static int sysmon_notify(struct notifier_block *nb, unsigned long event, void *data) { struct qcom_sysmon *sysmon = container_of(nb, struct qcom_sysmon, nb); - struct rproc *rproc = sysmon->rproc; struct sysmon_event *sysmon_event = data; /* Skip non-running rprocs and the originating instance */ - if (rproc->state != RPROC_RUNNING || + if (sysmon->state != SSCTL_SSR_EVENT_AFTER_POWERUP || !strcmp(sysmon_event->subsys_name, sysmon->name)) { dev_dbg(sysmon->dev, "not notifying %s\n", sysmon->name); return NOTIFY_DONE; @@ -591,6 +607,7 @@ struct qcom_sysmon *qcom_add_sysmon_subdev(struct rproc *rproc, init_completion(&sysmon->ind_comp); init_completion(&sysmon->shutdown_comp); mutex_init(&sysmon->lock); + mutex_init(&sysmon->state_lock); sysmon->shutdown_irq = of_irq_get_byname(sysmon->dev->of_node, "shutdown-ack"); diff --git a/drivers/remoteproc/ti_k3_dsp_remoteproc.c b/drivers/remoteproc/ti_k3_dsp_remoteproc.c index 9011e477290ce5..863c0214e0a8e3 100644 --- a/drivers/remoteproc/ti_k3_dsp_remoteproc.c +++ b/drivers/remoteproc/ti_k3_dsp_remoteproc.c @@ -445,10 +445,10 @@ static int k3_dsp_rproc_of_get_memories(struct platform_device *pdev, kproc->mem[i].cpu_addr = devm_ioremap_wc(dev, res->start, resource_size(res)); - if (IS_ERR(kproc->mem[i].cpu_addr)) { + if (!kproc->mem[i].cpu_addr) { dev_err(dev, "failed to map %s memory\n", data->mems[i].name); - return PTR_ERR(kproc->mem[i].cpu_addr); + return -ENOMEM; } kproc->mem[i].bus_addr = res->start; kproc->mem[i].dev_addr = data->mems[i].dev_addr; diff --git a/drivers/rtc/rtc-ep93xx.c b/drivers/rtc/rtc-ep93xx.c index 8ec9ea1ca72e19..6f90b85a58140f 100644 --- a/drivers/rtc/rtc-ep93xx.c +++ b/drivers/rtc/rtc-ep93xx.c @@ -33,7 +33,7 @@ struct ep93xx_rtc { static int ep93xx_rtc_get_swcomp(struct device *dev, unsigned short *preload, unsigned short *delete) { - struct ep93xx_rtc *ep93xx_rtc = dev_get_platdata(dev); + struct ep93xx_rtc *ep93xx_rtc = dev_get_drvdata(dev); unsigned long comp; comp = readl(ep93xx_rtc->mmio_base + EP93XX_RTC_SWCOMP); @@ -51,7 +51,7 @@ static int ep93xx_rtc_get_swcomp(struct device *dev, unsigned short *preload, static int ep93xx_rtc_read_time(struct device *dev, struct rtc_time *tm) { - struct ep93xx_rtc *ep93xx_rtc = dev_get_platdata(dev); + struct ep93xx_rtc *ep93xx_rtc = dev_get_drvdata(dev); unsigned long time; time = readl(ep93xx_rtc->mmio_base + EP93XX_RTC_DATA); @@ -62,7 +62,7 @@ static int ep93xx_rtc_read_time(struct device *dev, struct rtc_time *tm) static int ep93xx_rtc_set_time(struct device *dev, struct rtc_time *tm) { - struct ep93xx_rtc *ep93xx_rtc = dev_get_platdata(dev); + struct ep93xx_rtc *ep93xx_rtc = dev_get_drvdata(dev); unsigned long secs = rtc_tm_to_time64(tm); writel(secs + 1, ep93xx_rtc->mmio_base + EP93XX_RTC_LOAD); diff --git a/drivers/rtc/rtc-pcf2127.c b/drivers/rtc/rtc-pcf2127.c index 07a5630ec841fc..f0a6861ff3aef3 100644 --- a/drivers/rtc/rtc-pcf2127.c +++ b/drivers/rtc/rtc-pcf2127.c @@ -243,10 +243,8 @@ static int pcf2127_nvmem_read(void *priv, unsigned int offset, if (ret) return ret; - ret = regmap_bulk_read(pcf2127->regmap, PCF2127_REG_RAM_RD_CMD, - val, bytes); - - return ret ?: bytes; + return regmap_bulk_read(pcf2127->regmap, PCF2127_REG_RAM_RD_CMD, + val, bytes); } static int pcf2127_nvmem_write(void *priv, unsigned int offset, @@ -261,10 +259,8 @@ static int pcf2127_nvmem_write(void *priv, unsigned int offset, if (ret) return ret; - ret = regmap_bulk_write(pcf2127->regmap, PCF2127_REG_RAM_WRT_CMD, - val, bytes); - - return ret ?: bytes; + return regmap_bulk_write(pcf2127->regmap, PCF2127_REG_RAM_WRT_CMD, + val, bytes); } /* watchdog driver */ @@ -335,6 +331,37 @@ static const struct watchdog_ops pcf2127_watchdog_ops = { .set_timeout = pcf2127_wdt_set_timeout, }; +static int pcf2127_watchdog_init(struct device *dev, struct pcf2127 *pcf2127) +{ + u32 wdd_timeout; + int ret; + + if (!IS_ENABLED(CONFIG_WATCHDOG) || + !device_property_read_bool(dev, "reset-source")) + return 0; + + pcf2127->wdd.parent = dev; + pcf2127->wdd.info = &pcf2127_wdt_info; + pcf2127->wdd.ops = &pcf2127_watchdog_ops; + pcf2127->wdd.min_timeout = PCF2127_WD_VAL_MIN; + pcf2127->wdd.max_timeout = PCF2127_WD_VAL_MAX; + pcf2127->wdd.timeout = PCF2127_WD_VAL_DEFAULT; + pcf2127->wdd.min_hw_heartbeat_ms = 500; + pcf2127->wdd.status = WATCHDOG_NOWAYOUT_INIT_STATUS; + + watchdog_set_drvdata(&pcf2127->wdd, pcf2127); + + /* Test if watchdog timer is started by bootloader */ + ret = regmap_read(pcf2127->regmap, PCF2127_REG_WD_VAL, &wdd_timeout); + if (ret) + return ret; + + if (wdd_timeout) + set_bit(WDOG_HW_RUNNING, &pcf2127->wdd.status); + + return devm_watchdog_register_device(dev, &pcf2127->wdd); +} + /* Alarm */ static int pcf2127_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm) { @@ -536,7 +563,6 @@ static int pcf2127_probe(struct device *dev, struct regmap *regmap, int alarm_irq, const char *name, bool has_nvmem) { struct pcf2127 *pcf2127; - u32 wdd_timeout; int ret = 0; dev_dbg(dev, "%s\n", __func__); @@ -575,17 +601,6 @@ static int pcf2127_probe(struct device *dev, struct regmap *regmap, pcf2127->rtc->ops = &pcf2127_rtc_alrm_ops; } - pcf2127->wdd.parent = dev; - pcf2127->wdd.info = &pcf2127_wdt_info; - pcf2127->wdd.ops = &pcf2127_watchdog_ops; - pcf2127->wdd.min_timeout = PCF2127_WD_VAL_MIN; - pcf2127->wdd.max_timeout = PCF2127_WD_VAL_MAX; - pcf2127->wdd.timeout = PCF2127_WD_VAL_DEFAULT; - pcf2127->wdd.min_hw_heartbeat_ms = 500; - pcf2127->wdd.status = WATCHDOG_NOWAYOUT_INIT_STATUS; - - watchdog_set_drvdata(&pcf2127->wdd, pcf2127); - if (has_nvmem) { struct nvmem_config nvmem_cfg = { .priv = pcf2127, @@ -615,19 +630,7 @@ static int pcf2127_probe(struct device *dev, struct regmap *regmap, return ret; } - /* Test if watchdog timer is started by bootloader */ - ret = regmap_read(pcf2127->regmap, PCF2127_REG_WD_VAL, &wdd_timeout); - if (ret) - return ret; - - if (wdd_timeout) - set_bit(WDOG_HW_RUNNING, &pcf2127->wdd.status); - -#ifdef CONFIG_WATCHDOG - ret = devm_watchdog_register_device(dev, &pcf2127->wdd); - if (ret) - return ret; -#endif /* CONFIG_WATCHDOG */ + pcf2127_watchdog_init(dev, pcf2127); /* * Disable battery low/switch-over timestamp and interrupts. diff --git a/drivers/rtc/rtc-pl031.c b/drivers/rtc/rtc-pl031.c index c6b89273feba81..d4b2ab7861266e 100644 --- a/drivers/rtc/rtc-pl031.c +++ b/drivers/rtc/rtc-pl031.c @@ -361,8 +361,10 @@ static int pl031_probe(struct amba_device *adev, const struct amba_id *id) device_init_wakeup(&adev->dev, true); ldata->rtc = devm_rtc_allocate_device(&adev->dev); - if (IS_ERR(ldata->rtc)) - return PTR_ERR(ldata->rtc); + if (IS_ERR(ldata->rtc)) { + ret = PTR_ERR(ldata->rtc); + goto out; + } ldata->rtc->ops = ops; ldata->rtc->range_min = vendor->range_min; diff --git a/drivers/rtc/rtc-sun6i.c b/drivers/rtc/rtc-sun6i.c index e2b8b150bcb448..f2818cdd11d82e 100644 --- a/drivers/rtc/rtc-sun6i.c +++ b/drivers/rtc/rtc-sun6i.c @@ -272,7 +272,7 @@ static void __init sun6i_rtc_clk_init(struct device_node *node, 300000000); if (IS_ERR(rtc->int_osc)) { pr_crit("Couldn't register the internal oscillator\n"); - return; + goto err; } parents[0] = clk_hw_get_name(rtc->int_osc); @@ -290,7 +290,7 @@ static void __init sun6i_rtc_clk_init(struct device_node *node, rtc->losc = clk_register(NULL, &rtc->hw); if (IS_ERR(rtc->losc)) { pr_crit("Couldn't register the LOSC clock\n"); - return; + goto err_register; } of_property_read_string_index(node, "clock-output-names", 1, @@ -301,7 +301,7 @@ static void __init sun6i_rtc_clk_init(struct device_node *node, &rtc->lock); if (IS_ERR(rtc->ext_losc)) { pr_crit("Couldn't register the LOSC external gate\n"); - return; + goto err_register; } clk_data->num = 2; @@ -314,6 +314,8 @@ static void __init sun6i_rtc_clk_init(struct device_node *node, of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); return; +err_register: + clk_hw_unregister_fixed_rate(rtc->int_osc); err: kfree(clk_data); } diff --git a/drivers/s390/block/dasd_alias.c b/drivers/s390/block/dasd_alias.c index 99f86612f7751a..dc78a523a69f21 100644 --- a/drivers/s390/block/dasd_alias.c +++ b/drivers/s390/block/dasd_alias.c @@ -256,7 +256,6 @@ void dasd_alias_disconnect_device_from_lcu(struct dasd_device *device) return; device->discipline->get_uid(device, &uid); spin_lock_irqsave(&lcu->lock, flags); - list_del_init(&device->alias_list); /* make sure that the workers don't use this device */ if (device == lcu->suc_data.device) { spin_unlock_irqrestore(&lcu->lock, flags); @@ -283,6 +282,7 @@ void dasd_alias_disconnect_device_from_lcu(struct dasd_device *device) spin_lock_irqsave(&aliastree.lock, flags); spin_lock(&lcu->lock); + list_del_init(&device->alias_list); if (list_empty(&lcu->grouplist) && list_empty(&lcu->active_devices) && list_empty(&lcu->inactive_devices)) { @@ -462,11 +462,19 @@ static int read_unit_address_configuration(struct dasd_device *device, spin_unlock_irqrestore(&lcu->lock, flags); rc = dasd_sleep_on(cqr); - if (rc && !suborder_not_supported(cqr)) { + if (!rc) + goto out; + + if (suborder_not_supported(cqr)) { + /* suborder not supported or device unusable for IO */ + rc = -EOPNOTSUPP; + } else { + /* IO failed but should be retried */ spin_lock_irqsave(&lcu->lock, flags); lcu->flags |= NEED_UAC_UPDATE; spin_unlock_irqrestore(&lcu->lock, flags); } +out: dasd_sfree_request(cqr, cqr->memdev); return rc; } @@ -503,6 +511,14 @@ static int _lcu_update(struct dasd_device *refdev, struct alias_lcu *lcu) return rc; spin_lock_irqsave(&lcu->lock, flags); + /* + * there is another update needed skip the remaining handling + * the data might already be outdated + * but especially do not add the device to an LCU with pending + * update + */ + if (lcu->flags & NEED_UAC_UPDATE) + goto out; lcu->pav = NO_PAV; for (i = 0; i < MAX_DEVICES_PER_LCU; ++i) { switch (lcu->uac->unit[i].ua_type) { @@ -521,6 +537,7 @@ static int _lcu_update(struct dasd_device *refdev, struct alias_lcu *lcu) alias_list) { _add_device_to_lcu(lcu, device, refdev); } +out: spin_unlock_irqrestore(&lcu->lock, flags); return 0; } @@ -625,6 +642,7 @@ int dasd_alias_add_device(struct dasd_device *device) } if (lcu->flags & UPDATE_PENDING) { list_move(&device->alias_list, &lcu->active_devices); + private->pavgroup = NULL; _schedule_lcu_update(lcu, device); } spin_unlock_irqrestore(&lcu->lock, flags); diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c index b29fe8d50baf25..33280ca181e952 100644 --- a/drivers/s390/cio/device.c +++ b/drivers/s390/cio/device.c @@ -1664,10 +1664,10 @@ void __init ccw_device_destroy_console(struct ccw_device *cdev) struct io_subchannel_private *io_priv = to_io_private(sch); set_io_private(sch, NULL); - put_device(&sch->dev); - put_device(&cdev->dev); dma_free_coherent(&sch->dev, sizeof(*io_priv->dma_area), io_priv->dma_area, io_priv->dma_area_dma); + put_device(&sch->dev); + put_device(&cdev->dev); kfree(io_priv); } diff --git a/drivers/s390/crypto/vfio_ap_drv.c b/drivers/s390/crypto/vfio_ap_drv.c index be2520cc010bef..7dc72cb718b0e2 100644 --- a/drivers/s390/crypto/vfio_ap_drv.c +++ b/drivers/s390/crypto/vfio_ap_drv.c @@ -71,15 +71,11 @@ static int vfio_ap_queue_dev_probe(struct ap_device *apdev) static void vfio_ap_queue_dev_remove(struct ap_device *apdev) { struct vfio_ap_queue *q; - int apid, apqi; mutex_lock(&matrix_dev->lock); q = dev_get_drvdata(&apdev->device); + vfio_ap_mdev_reset_queue(q, 1); dev_set_drvdata(&apdev->device, NULL); - apid = AP_QID_CARD(q->apqn); - apqi = AP_QID_QUEUE(q->apqn); - vfio_ap_mdev_reset_queue(apid, apqi, 1); - vfio_ap_irq_disable(q); kfree(q); mutex_unlock(&matrix_dev->lock); } diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c index e0bde85187451b..7ceb6c433b3ba0 100644 --- a/drivers/s390/crypto/vfio_ap_ops.c +++ b/drivers/s390/crypto/vfio_ap_ops.c @@ -25,6 +25,7 @@ #define VFIO_AP_MDEV_NAME_HWVIRT "VFIO AP Passthrough Device" static int vfio_ap_mdev_reset_queues(struct mdev_device *mdev); +static struct vfio_ap_queue *vfio_ap_find_queue(int apqn); static int match_apqn(struct device *dev, const void *data) { @@ -49,20 +50,15 @@ static struct vfio_ap_queue *vfio_ap_get_queue( int apqn) { struct vfio_ap_queue *q; - struct device *dev; if (!test_bit_inv(AP_QID_CARD(apqn), matrix_mdev->matrix.apm)) return NULL; if (!test_bit_inv(AP_QID_QUEUE(apqn), matrix_mdev->matrix.aqm)) return NULL; - dev = driver_find_device(&matrix_dev->vfio_ap_drv->driver, NULL, - &apqn, match_apqn); - if (!dev) - return NULL; - q = dev_get_drvdata(dev); - q->matrix_mdev = matrix_mdev; - put_device(dev); + q = vfio_ap_find_queue(apqn); + if (q) + q->matrix_mdev = matrix_mdev; return q; } @@ -119,13 +115,18 @@ static void vfio_ap_wait_for_irqclear(int apqn) */ static void vfio_ap_free_aqic_resources(struct vfio_ap_queue *q) { - if (q->saved_isc != VFIO_AP_ISC_INVALID && q->matrix_mdev) + if (!q) + return; + if (q->saved_isc != VFIO_AP_ISC_INVALID && + !WARN_ON(!(q->matrix_mdev && q->matrix_mdev->kvm))) { kvm_s390_gisc_unregister(q->matrix_mdev->kvm, q->saved_isc); - if (q->saved_pfn && q->matrix_mdev) + q->saved_isc = VFIO_AP_ISC_INVALID; + } + if (q->saved_pfn && !WARN_ON(!q->matrix_mdev)) { vfio_unpin_pages(mdev_dev(q->matrix_mdev->mdev), &q->saved_pfn, 1); - q->saved_pfn = 0; - q->saved_isc = VFIO_AP_ISC_INVALID; + q->saved_pfn = 0; + } } /** @@ -144,7 +145,7 @@ static void vfio_ap_free_aqic_resources(struct vfio_ap_queue *q) * Returns if ap_aqic function failed with invalid, deconfigured or * checkstopped AP. */ -struct ap_queue_status vfio_ap_irq_disable(struct vfio_ap_queue *q) +static struct ap_queue_status vfio_ap_irq_disable(struct vfio_ap_queue *q) { struct ap_qirq_ctrl aqic_gisa = {}; struct ap_queue_status status; @@ -1114,48 +1115,70 @@ static int vfio_ap_mdev_group_notifier(struct notifier_block *nb, return NOTIFY_OK; } -static void vfio_ap_irq_disable_apqn(int apqn) +static struct vfio_ap_queue *vfio_ap_find_queue(int apqn) { struct device *dev; - struct vfio_ap_queue *q; + struct vfio_ap_queue *q = NULL; dev = driver_find_device(&matrix_dev->vfio_ap_drv->driver, NULL, &apqn, match_apqn); if (dev) { q = dev_get_drvdata(dev); - vfio_ap_irq_disable(q); put_device(dev); } + + return q; } -int vfio_ap_mdev_reset_queue(unsigned int apid, unsigned int apqi, +int vfio_ap_mdev_reset_queue(struct vfio_ap_queue *q, unsigned int retry) { struct ap_queue_status status; + int ret; int retry2 = 2; - int apqn = AP_MKQID(apid, apqi); - do { - status = ap_zapq(apqn); - switch (status.response_code) { - case AP_RESPONSE_NORMAL: - while (!status.queue_empty && retry2--) { - msleep(20); - status = ap_tapq(apqn, NULL); - } - WARN_ON_ONCE(retry2 <= 0); - return 0; - case AP_RESPONSE_RESET_IN_PROGRESS: - case AP_RESPONSE_BUSY: + if (!q) + return 0; + +retry_zapq: + status = ap_zapq(q->apqn); + switch (status.response_code) { + case AP_RESPONSE_NORMAL: + ret = 0; + break; + case AP_RESPONSE_RESET_IN_PROGRESS: + if (retry--) { msleep(20); - break; - default: - /* things are really broken, give up */ - return -EIO; + goto retry_zapq; } - } while (retry--); + ret = -EBUSY; + break; + case AP_RESPONSE_Q_NOT_AVAIL: + case AP_RESPONSE_DECONFIGURED: + case AP_RESPONSE_CHECKSTOPPED: + WARN_ON_ONCE(status.irq_enabled); + ret = -EBUSY; + goto free_resources; + default: + /* things are really broken, give up */ + WARN(true, "PQAP/ZAPQ completed with invalid rc (%x)\n", + status.response_code); + return -EIO; + } + + /* wait for the reset to take effect */ + while (retry2--) { + if (status.queue_empty && !status.irq_enabled) + break; + msleep(20); + status = ap_tapq(q->apqn, NULL); + } + WARN_ON_ONCE(retry2 <= 0); - return -EBUSY; +free_resources: + vfio_ap_free_aqic_resources(q); + + return ret; } static int vfio_ap_mdev_reset_queues(struct mdev_device *mdev) @@ -1163,13 +1186,15 @@ static int vfio_ap_mdev_reset_queues(struct mdev_device *mdev) int ret; int rc = 0; unsigned long apid, apqi; + struct vfio_ap_queue *q; struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev); for_each_set_bit_inv(apid, matrix_mdev->matrix.apm, matrix_mdev->matrix.apm_max + 1) { for_each_set_bit_inv(apqi, matrix_mdev->matrix.aqm, matrix_mdev->matrix.aqm_max + 1) { - ret = vfio_ap_mdev_reset_queue(apid, apqi, 1); + q = vfio_ap_find_queue(AP_MKQID(apid, apqi)); + ret = vfio_ap_mdev_reset_queue(q, 1); /* * Regardless whether a queue turns out to be busy, or * is not operational, we need to continue resetting @@ -1177,7 +1202,6 @@ static int vfio_ap_mdev_reset_queues(struct mdev_device *mdev) */ if (ret) rc = ret; - vfio_ap_irq_disable_apqn(AP_MKQID(apid, apqi)); } } diff --git a/drivers/s390/crypto/vfio_ap_private.h b/drivers/s390/crypto/vfio_ap_private.h index f46dde56b4644f..28e9d998976820 100644 --- a/drivers/s390/crypto/vfio_ap_private.h +++ b/drivers/s390/crypto/vfio_ap_private.h @@ -88,11 +88,6 @@ struct ap_matrix_mdev { struct mdev_device *mdev; }; -extern int vfio_ap_mdev_register(void); -extern void vfio_ap_mdev_unregister(void); -int vfio_ap_mdev_reset_queue(unsigned int apid, unsigned int apqi, - unsigned int retry); - struct vfio_ap_queue { struct ap_matrix_mdev *matrix_mdev; unsigned long saved_pfn; @@ -100,5 +95,10 @@ struct vfio_ap_queue { #define VFIO_AP_ISC_INVALID 0xff unsigned char saved_isc; }; -struct ap_queue_status vfio_ap_irq_disable(struct vfio_ap_queue *q); + +int vfio_ap_mdev_register(void); +void vfio_ap_mdev_unregister(void); +int vfio_ap_mdev_reset_queue(struct vfio_ap_queue *q, + unsigned int retry); + #endif /* _VFIO_AP_PRIVATE_H_ */ diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h index b235393e091caf..2f7e06ec9a30ec 100644 --- a/drivers/s390/net/qeth_core.h +++ b/drivers/s390/net/qeth_core.h @@ -1075,7 +1075,8 @@ struct qeth_card *qeth_get_card_by_busid(char *bus_id); void qeth_set_allowed_threads(struct qeth_card *card, unsigned long threads, int clear_start_mask); int qeth_threads_running(struct qeth_card *, unsigned long); -int qeth_set_offline(struct qeth_card *card, bool resetting); +int qeth_set_offline(struct qeth_card *card, const struct qeth_discipline *disc, + bool resetting); int qeth_send_ipa_cmd(struct qeth_card *, struct qeth_cmd_buffer *, int (*reply_cb) diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index e27319de7b00be..f108232498bafb 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -5300,12 +5300,12 @@ static int qeth_hardsetup_card(struct qeth_card *card, bool *carrier_ok) return rc; } -static int qeth_set_online(struct qeth_card *card) +static int qeth_set_online(struct qeth_card *card, + const struct qeth_discipline *disc) { bool carrier_ok; int rc; - mutex_lock(&card->discipline_mutex); mutex_lock(&card->conf_mutex); QETH_CARD_TEXT(card, 2, "setonlin"); @@ -5322,7 +5322,7 @@ static int qeth_set_online(struct qeth_card *card) /* no need for locking / error handling at this early stage: */ qeth_set_real_num_tx_queues(card, qeth_tx_actual_queues(card)); - rc = card->discipline->set_online(card, carrier_ok); + rc = disc->set_online(card, carrier_ok); if (rc) goto err_online; @@ -5330,7 +5330,6 @@ static int qeth_set_online(struct qeth_card *card) kobject_uevent(&card->gdev->dev.kobj, KOBJ_CHANGE); mutex_unlock(&card->conf_mutex); - mutex_unlock(&card->discipline_mutex); return 0; err_online: @@ -5345,15 +5344,14 @@ static int qeth_set_online(struct qeth_card *card) qdio_free(CARD_DDEV(card)); mutex_unlock(&card->conf_mutex); - mutex_unlock(&card->discipline_mutex); return rc; } -int qeth_set_offline(struct qeth_card *card, bool resetting) +int qeth_set_offline(struct qeth_card *card, const struct qeth_discipline *disc, + bool resetting) { int rc, rc2, rc3; - mutex_lock(&card->discipline_mutex); mutex_lock(&card->conf_mutex); QETH_CARD_TEXT(card, 3, "setoffl"); @@ -5374,7 +5372,7 @@ int qeth_set_offline(struct qeth_card *card, bool resetting) cancel_work_sync(&card->rx_mode_work); - card->discipline->set_offline(card); + disc->set_offline(card); qeth_qdio_clear_card(card, 0); qeth_drain_output_queues(card); @@ -5395,16 +5393,19 @@ int qeth_set_offline(struct qeth_card *card, bool resetting) kobject_uevent(&card->gdev->dev.kobj, KOBJ_CHANGE); mutex_unlock(&card->conf_mutex); - mutex_unlock(&card->discipline_mutex); return 0; } EXPORT_SYMBOL_GPL(qeth_set_offline); static int qeth_do_reset(void *data) { + const struct qeth_discipline *disc; struct qeth_card *card = data; int rc; + /* Lock-free, other users will block until we are done. */ + disc = card->discipline; + QETH_CARD_TEXT(card, 2, "recover1"); if (!qeth_do_run_thread(card, QETH_RECOVER_THREAD)) return 0; @@ -5412,8 +5413,8 @@ static int qeth_do_reset(void *data) dev_warn(&card->gdev->dev, "A recovery process has been started for the device\n"); - qeth_set_offline(card, true); - rc = qeth_set_online(card); + qeth_set_offline(card, disc, true); + rc = qeth_set_online(card, disc); if (!rc) { dev_info(&card->gdev->dev, "Device successfully recovered!\n"); @@ -6360,6 +6361,7 @@ static int qeth_core_probe_device(struct ccwgroup_device *gdev) break; default: card->info.layer_enforced = true; + /* It's so early that we don't need the discipline_mutex yet. */ rc = qeth_core_load_discipline(card, enforced_disc); if (rc) goto err_load; @@ -6392,10 +6394,12 @@ static void qeth_core_remove_device(struct ccwgroup_device *gdev) QETH_CARD_TEXT(card, 2, "removedv"); + mutex_lock(&card->discipline_mutex); if (card->discipline) { card->discipline->remove(gdev); qeth_core_free_discipline(card); } + mutex_unlock(&card->discipline_mutex); qeth_free_qdio_queues(card); @@ -6410,6 +6414,7 @@ static int qeth_core_set_online(struct ccwgroup_device *gdev) int rc = 0; enum qeth_discipline_id def_discipline; + mutex_lock(&card->discipline_mutex); if (!card->discipline) { def_discipline = IS_IQD(card) ? QETH_DISCIPLINE_LAYER3 : QETH_DISCIPLINE_LAYER2; @@ -6423,16 +6428,23 @@ static int qeth_core_set_online(struct ccwgroup_device *gdev) } } - rc = qeth_set_online(card); + rc = qeth_set_online(card, card->discipline); + err: + mutex_unlock(&card->discipline_mutex); return rc; } static int qeth_core_set_offline(struct ccwgroup_device *gdev) { struct qeth_card *card = dev_get_drvdata(&gdev->dev); + int rc; - return qeth_set_offline(card, false); + mutex_lock(&card->discipline_mutex); + rc = qeth_set_offline(card, card->discipline, false); + mutex_unlock(&card->discipline_mutex); + + return rc; } static void qeth_core_shutdown(struct ccwgroup_device *gdev) diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 79939ba5d52356..cfc931f2b7e2ca 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -2208,7 +2208,7 @@ static void qeth_l2_remove_device(struct ccwgroup_device *gdev) wait_event(card->wait_q, qeth_threads_running(card, 0xffffffff) == 0); if (gdev->state == CCWGROUP_ONLINE) - qeth_set_offline(card, false); + qeth_set_offline(card, card->discipline, false); cancel_work_sync(&card->close_dev_work); if (card->dev->reg_state == NETREG_REGISTERED) diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index b1c1d2510d55bf..291861c9b95698 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -1816,7 +1816,7 @@ static netdev_features_t qeth_l3_osa_features_check(struct sk_buff *skb, struct net_device *dev, netdev_features_t features) { - if (qeth_get_ip_version(skb) != 4) + if (vlan_get_protocol(skb) != htons(ETH_P_IP)) features &= ~NETIF_F_HW_VLAN_CTAG_TX; return qeth_features_check(skb, dev, features); } @@ -1974,7 +1974,7 @@ static void qeth_l3_remove_device(struct ccwgroup_device *cgdev) wait_event(card->wait_q, qeth_threads_running(card, 0xffffffff) == 0); if (cgdev->state == CCWGROUP_ONLINE) - qeth_set_offline(card, false); + qeth_set_offline(card, card->discipline, false); cancel_work_sync(&card->close_dev_work); if (card->dev->reg_state == NETREG_REGISTERED) diff --git a/drivers/scsi/aacraid/commctrl.c b/drivers/scsi/aacraid/commctrl.c index e3e157a7498808..1b1da162f5f6be 100644 --- a/drivers/scsi/aacraid/commctrl.c +++ b/drivers/scsi/aacraid/commctrl.c @@ -25,6 +25,7 @@ #include #include #include +#include #include /* ssleep prototype */ #include #include @@ -226,6 +227,12 @@ static int open_getadapter_fib(struct aac_dev * dev, void __user *arg) return status; } +struct compat_fib_ioctl { + u32 fibctx; + s32 wait; + compat_uptr_t fib; +}; + /** * next_getadapter_fib - get the next fib * @dev: adapter to use @@ -243,8 +250,19 @@ static int next_getadapter_fib(struct aac_dev * dev, void __user *arg) struct list_head * entry; unsigned long flags; - if(copy_from_user((void *)&f, arg, sizeof(struct fib_ioctl))) - return -EFAULT; + if (in_compat_syscall()) { + struct compat_fib_ioctl cf; + + if (copy_from_user(&cf, arg, sizeof(struct compat_fib_ioctl))) + return -EFAULT; + + f.fibctx = cf.fibctx; + f.wait = cf.wait; + f.fib = compat_ptr(cf.fib); + } else { + if (copy_from_user(&f, arg, sizeof(struct fib_ioctl))) + return -EFAULT; + } /* * Verify that the HANDLE passed in was a valid AdapterFibContext * diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c index 8f3772480582c1..0a82afaf402859 100644 --- a/drivers/scsi/aacraid/linit.c +++ b/drivers/scsi/aacraid/linit.c @@ -1182,63 +1182,6 @@ static long aac_cfg_ioctl(struct file *file, return aac_do_ioctl(aac, cmd, (void __user *)arg); } -#ifdef CONFIG_COMPAT -static long aac_compat_do_ioctl(struct aac_dev *dev, unsigned cmd, unsigned long arg) -{ - long ret; - switch (cmd) { - case FSACTL_MINIPORT_REV_CHECK: - case FSACTL_SENDFIB: - case FSACTL_OPEN_GET_ADAPTER_FIB: - case FSACTL_CLOSE_GET_ADAPTER_FIB: - case FSACTL_SEND_RAW_SRB: - case FSACTL_GET_PCI_INFO: - case FSACTL_QUERY_DISK: - case FSACTL_DELETE_DISK: - case FSACTL_FORCE_DELETE_DISK: - case FSACTL_GET_CONTAINERS: - case FSACTL_SEND_LARGE_FIB: - ret = aac_do_ioctl(dev, cmd, (void __user *)arg); - break; - - case FSACTL_GET_NEXT_ADAPTER_FIB: { - struct fib_ioctl __user *f; - - f = compat_alloc_user_space(sizeof(*f)); - ret = 0; - if (clear_user(f, sizeof(*f))) - ret = -EFAULT; - if (copy_in_user(f, (void __user *)arg, sizeof(struct fib_ioctl) - sizeof(u32))) - ret = -EFAULT; - if (!ret) - ret = aac_do_ioctl(dev, cmd, f); - break; - } - - default: - ret = -ENOIOCTLCMD; - break; - } - return ret; -} - -static int aac_compat_ioctl(struct scsi_device *sdev, unsigned int cmd, - void __user *arg) -{ - struct aac_dev *dev = (struct aac_dev *)sdev->host->hostdata; - if (!capable(CAP_SYS_RAWIO)) - return -EPERM; - return aac_compat_do_ioctl(dev, cmd, (unsigned long)arg); -} - -static long aac_compat_cfg_ioctl(struct file *file, unsigned cmd, unsigned long arg) -{ - if (!capable(CAP_SYS_RAWIO)) - return -EPERM; - return aac_compat_do_ioctl(file->private_data, cmd, arg); -} -#endif - static ssize_t aac_show_model(struct device *device, struct device_attribute *attr, char *buf) { @@ -1523,7 +1466,7 @@ static const struct file_operations aac_cfg_fops = { .owner = THIS_MODULE, .unlocked_ioctl = aac_cfg_ioctl, #ifdef CONFIG_COMPAT - .compat_ioctl = aac_compat_cfg_ioctl, + .compat_ioctl = aac_cfg_ioctl, #endif .open = aac_cfg_open, .llseek = noop_llseek, @@ -1536,7 +1479,7 @@ static struct scsi_host_template aac_driver_template = { .info = aac_info, .ioctl = aac_ioctl, #ifdef CONFIG_COMPAT - .compat_ioctl = aac_compat_ioctl, + .compat_ioctl = aac_ioctl, #endif .queuecommand = aac_queuecommand, .bios_param = aac_biosparm, diff --git a/drivers/scsi/cxgbi/cxgb4i/Kconfig b/drivers/scsi/cxgbi/cxgb4i/Kconfig index b206e266b4e726..8b0deece9758b8 100644 --- a/drivers/scsi/cxgbi/cxgb4i/Kconfig +++ b/drivers/scsi/cxgbi/cxgb4i/Kconfig @@ -4,6 +4,7 @@ config SCSI_CXGB4_ISCSI depends on PCI && INET && (IPV6 || IPV6=n) depends on THERMAL || !THERMAL depends on ETHERNET + depends on TLS || TLS=n select NET_VENDOR_CHELSIO select CHELSIO_T4 select CHELSIO_LIB diff --git a/drivers/scsi/fnic/fnic_main.c b/drivers/scsi/fnic/fnic_main.c index 5f8a7ef8f6a8e1..4f7befb43d6040 100644 --- a/drivers/scsi/fnic/fnic_main.c +++ b/drivers/scsi/fnic/fnic_main.c @@ -740,6 +740,7 @@ static int fnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) for (i = 0; i < FNIC_IO_LOCKS; i++) spin_lock_init(&fnic->io_req_lock[i]); + err = -ENOMEM; fnic->io_req_pool = mempool_create_slab_pool(2, fnic_io_req_cache); if (!fnic->io_req_pool) goto err_out_free_resources; diff --git a/drivers/scsi/fnic/vnic_dev.c b/drivers/scsi/fnic/vnic_dev.c index a2beee6e09f06b..5988c300cc82ed 100644 --- a/drivers/scsi/fnic/vnic_dev.c +++ b/drivers/scsi/fnic/vnic_dev.c @@ -444,7 +444,8 @@ static int vnic_dev_init_devcmd2(struct vnic_dev *vdev) fetch_index = ioread32(&vdev->devcmd2->wq.ctrl->fetch_index); if (fetch_index == 0xFFFFFFFF) { /* check for hardware gone */ pr_err("error in devcmd2 init"); - return -ENODEV; + err = -ENODEV; + goto err_free_wq; } /* @@ -460,7 +461,7 @@ static int vnic_dev_init_devcmd2(struct vnic_dev *vdev) err = vnic_dev_alloc_desc_ring(vdev, &vdev->devcmd2->results_ring, DEVCMD2_RING_SIZE, DEVCMD2_DESC_SIZE); if (err) - goto err_free_wq; + goto err_disable_wq; vdev->devcmd2->result = (struct devcmd2_result *) vdev->devcmd2->results_ring.descs; @@ -481,8 +482,9 @@ static int vnic_dev_init_devcmd2(struct vnic_dev *vdev) err_free_desc_ring: vnic_dev_free_desc_ring(vdev, &vdev->devcmd2->results_ring); -err_free_wq: +err_disable_wq: vnic_wq_disable(&vdev->devcmd2->wq); +err_free_wq: vnic_wq_free(&vdev->devcmd2->wq); err_free_devcmd2: kfree(vdev->devcmd2); diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c index 960de375ce699d..2cbd8a524edabc 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c @@ -2409,8 +2409,7 @@ static int interrupt_init_v3_hw(struct hisi_hba *hisi_hba) DRV_NAME " phy", hisi_hba); if (rc) { dev_err(dev, "could not request phy interrupt, rc=%d\n", rc); - rc = -ENOENT; - goto free_irq_vectors; + return -ENOENT; } rc = devm_request_irq(dev, pci_irq_vector(pdev, 2), @@ -2418,8 +2417,7 @@ static int interrupt_init_v3_hw(struct hisi_hba *hisi_hba) DRV_NAME " channel", hisi_hba); if (rc) { dev_err(dev, "could not request chnl interrupt, rc=%d\n", rc); - rc = -ENOENT; - goto free_irq_vectors; + return -ENOENT; } rc = devm_request_irq(dev, pci_irq_vector(pdev, 11), @@ -2427,8 +2425,7 @@ static int interrupt_init_v3_hw(struct hisi_hba *hisi_hba) DRV_NAME " fatal", hisi_hba); if (rc) { dev_err(dev, "could not request fatal interrupt, rc=%d\n", rc); - rc = -ENOENT; - goto free_irq_vectors; + return -ENOENT; } if (hisi_sas_intr_conv) @@ -2449,8 +2446,7 @@ static int interrupt_init_v3_hw(struct hisi_hba *hisi_hba) if (rc) { dev_err(dev, "could not request cq%d interrupt, rc=%d\n", i, rc); - rc = -ENOENT; - goto free_irq_vectors; + return -ENOENT; } cq->irq_mask = pci_irq_get_affinity(pdev, i + BASE_VECTORS_V3_HW); if (!cq->irq_mask) { @@ -2460,10 +2456,6 @@ static int interrupt_init_v3_hw(struct hisi_hba *hisi_hba) } return 0; - -free_irq_vectors: - pci_free_irq_vectors(pdev); - return rc; } static int hisi_sas_v3_init(struct hisi_hba *hisi_hba) @@ -3317,11 +3309,11 @@ hisi_sas_v3_probe(struct pci_dev *pdev, const struct pci_device_id *id) rc = interrupt_preinit_v3_hw(hisi_hba); if (rc) - goto err_out_ha; + goto err_out_debugfs; dev_err(dev, "%d hw queues\n", shost->nr_hw_queues); rc = scsi_add_host(shost, dev); if (rc) - goto err_out_ha; + goto err_out_free_irq_vectors; rc = sas_register_ha(sha); if (rc) @@ -3348,8 +3340,12 @@ hisi_sas_v3_probe(struct pci_dev *pdev, const struct pci_device_id *id) err_out_register_ha: scsi_remove_host(shost); -err_out_ha: +err_out_free_irq_vectors: + pci_free_irq_vectors(pdev); +err_out_debugfs: hisi_sas_debugfs_exit(hisi_hba); +err_out_ha: + hisi_sas_free(hisi_hba); scsi_host_put(shost); err_out_regions: pci_release_regions(pdev); diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c index 070cf516b98feb..57c9a71fa33a7c 100644 --- a/drivers/scsi/ibmvscsi/ibmvfc.c +++ b/drivers/scsi/ibmvscsi/ibmvfc.c @@ -2957,8 +2957,10 @@ static int ibmvfc_slave_configure(struct scsi_device *sdev) unsigned long flags = 0; spin_lock_irqsave(shost->host_lock, flags); - if (sdev->type == TYPE_DISK) + if (sdev->type == TYPE_DISK) { sdev->allow_restart = 1; + blk_queue_rq_timeout(sdev->request_queue, 120 * HZ); + } spin_unlock_irqrestore(shost->host_lock, flags); return 0; } diff --git a/drivers/scsi/libfc/fc_exch.c b/drivers/scsi/libfc/fc_exch.c index 96a2952cf626bf..a50f1eef0e0cdb 100644 --- a/drivers/scsi/libfc/fc_exch.c +++ b/drivers/scsi/libfc/fc_exch.c @@ -1624,8 +1624,13 @@ static void fc_exch_recv_seq_resp(struct fc_exch_mgr *mp, struct fc_frame *fp) rc = fc_exch_done_locked(ep); WARN_ON(fc_seq_exch(sp) != ep); spin_unlock_bh(&ep->ex_lock); - if (!rc) + if (!rc) { fc_exch_delete(ep); + } else { + FC_EXCH_DBG(ep, "ep is completed already," + "hence skip calling the resp\n"); + goto skip_resp; + } } /* @@ -1644,6 +1649,7 @@ static void fc_exch_recv_seq_resp(struct fc_exch_mgr *mp, struct fc_frame *fp) if (!fc_invoke_resp(ep, sp, fp)) fc_frame_free(fp); +skip_resp: fc_exch_release(ep); return; rel: @@ -1900,10 +1906,16 @@ static void fc_exch_reset(struct fc_exch *ep) fc_exch_hold(ep); - if (!rc) + if (!rc) { fc_exch_delete(ep); + } else { + FC_EXCH_DBG(ep, "ep is completed already," + "hence skip calling the resp\n"); + goto skip_resp; + } fc_invoke_resp(ep, sp, ERR_PTR(-FC_EX_CLOSED)); +skip_resp: fc_seq_set_resp(sp, NULL, ep->arg); fc_exch_release(ep); } diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h index 549adfaa97ce5a..93e507677bdcb3 100644 --- a/drivers/scsi/lpfc/lpfc.h +++ b/drivers/scsi/lpfc/lpfc.h @@ -753,7 +753,7 @@ struct lpfc_hba { #define HBA_SP_QUEUE_EVT 0x8 /* Slow-path qevt posted to worker thread*/ #define HBA_POST_RECEIVE_BUFFER 0x10 /* Rcv buffers need to be posted */ #define HBA_PERSISTENT_TOPO 0x20 /* Persistent topology support in hba */ -#define ELS_XRI_ABORT_EVENT 0x40 +#define ELS_XRI_ABORT_EVENT 0x40 /* ELS_XRI abort event was queued */ #define ASYNC_EVENT 0x80 #define LINK_DISABLED 0x100 /* Link disabled by user */ #define FCF_TS_INPROG 0x200 /* FCF table scan in progress */ diff --git a/drivers/scsi/lpfc/lpfc_disc.h b/drivers/scsi/lpfc/lpfc_disc.h index 482e4a888daeca..1437e44ade8011 100644 --- a/drivers/scsi/lpfc/lpfc_disc.h +++ b/drivers/scsi/lpfc/lpfc_disc.h @@ -41,6 +41,7 @@ enum lpfc_work_type { LPFC_EVT_DEV_LOSS, LPFC_EVT_FASTPATH_MGMT_EVT, LPFC_EVT_RESET_HBA, + LPFC_EVT_RECOVER_PORT }; /* structure used to queue event to the discovery tasklet */ @@ -128,6 +129,7 @@ struct lpfc_nodelist { struct lpfc_vport *vport; struct lpfc_work_evt els_retry_evt; struct lpfc_work_evt dev_loss_evt; + struct lpfc_work_evt recovery_evt; struct kref kref; atomic_t cmd_pending; uint32_t cmd_qdepth; diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c index bb02fd8bc2ddf2..9746d2f4fcfadc 100644 --- a/drivers/scsi/lpfc/lpfc_hbadisc.c +++ b/drivers/scsi/lpfc/lpfc_hbadisc.c @@ -552,6 +552,15 @@ lpfc_work_list_done(struct lpfc_hba *phba) fcf_inuse, nlp_did); break; + case LPFC_EVT_RECOVER_PORT: + ndlp = (struct lpfc_nodelist *)(evtp->evt_arg1); + lpfc_sli_abts_recover_port(ndlp->vport, ndlp); + free_evt = 0; + /* decrement the node reference count held for + * this queued work + */ + lpfc_nlp_put(ndlp); + break; case LPFC_EVT_ONLINE: if (phba->link_state < LPFC_LINK_DOWN) *(int *) (evtp->evt_arg1) = lpfc_online(phba); @@ -4515,6 +4524,8 @@ lpfc_initialize_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, INIT_LIST_HEAD(&ndlp->els_retry_evt.evt_listp); INIT_LIST_HEAD(&ndlp->dev_loss_evt.evt_listp); timer_setup(&ndlp->nlp_delayfunc, lpfc_els_retry_delay, 0); + INIT_LIST_HEAD(&ndlp->recovery_evt.evt_listp); + ndlp->nlp_DID = did; ndlp->vport = vport; ndlp->phba = vport->phba; @@ -5011,6 +5022,29 @@ lpfc_unreg_rpi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) mempool_free(mbox, phba->mbox_mem_pool); acc_plogi = 1; } + } else { + lpfc_printf_vlog(vport, KERN_INFO, + LOG_NODE | LOG_DISCOVERY, + "1444 Failed to allocate mempool " + "unreg_rpi UNREG x%x, " + "DID x%x, flag x%x, " + "ndlp x%px\n", + ndlp->nlp_rpi, ndlp->nlp_DID, + ndlp->nlp_flag, ndlp); + + /* Because mempool_alloc failed, we + * will issue a LOGO here and keep the rpi alive if + * not unloading. + */ + if (!(vport->load_flag & FC_UNLOADING)) { + ndlp->nlp_flag &= ~NLP_UNREG_INP; + lpfc_issue_els_logo(vport, ndlp, 0); + ndlp->nlp_prev_state = ndlp->nlp_state; + lpfc_nlp_set_state(vport, ndlp, + NLP_STE_NPR_NODE); + } + + return 1; } lpfc_no_rpi(phba, ndlp); out: @@ -5214,6 +5248,7 @@ lpfc_cleanup_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) list_del_init(&ndlp->els_retry_evt.evt_listp); list_del_init(&ndlp->dev_loss_evt.evt_listp); + list_del_init(&ndlp->recovery_evt.evt_listp); lpfc_cleanup_vports_rrqs(vport, ndlp); if (phba->sli_rev == LPFC_SLI_REV4) ndlp->nlp_flag |= NLP_RELEASE_RPI; diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index ca25e54bb78249..40fe889033d43a 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -5958,18 +5958,21 @@ lpfc_sli4_async_grp5_evt(struct lpfc_hba *phba, void lpfc_sli4_async_event_proc(struct lpfc_hba *phba) { struct lpfc_cq_event *cq_event; + unsigned long iflags; /* First, declare the async event has been handled */ - spin_lock_irq(&phba->hbalock); + spin_lock_irqsave(&phba->hbalock, iflags); phba->hba_flag &= ~ASYNC_EVENT; - spin_unlock_irq(&phba->hbalock); + spin_unlock_irqrestore(&phba->hbalock, iflags); + /* Now, handle all the async events */ + spin_lock_irqsave(&phba->sli4_hba.asynce_list_lock, iflags); while (!list_empty(&phba->sli4_hba.sp_asynce_work_queue)) { - /* Get the first event from the head of the event queue */ - spin_lock_irq(&phba->hbalock); list_remove_head(&phba->sli4_hba.sp_asynce_work_queue, cq_event, struct lpfc_cq_event, list); - spin_unlock_irq(&phba->hbalock); + spin_unlock_irqrestore(&phba->sli4_hba.asynce_list_lock, + iflags); + /* Process the asynchronous event */ switch (bf_get(lpfc_trailer_code, &cq_event->cqe.mcqe_cmpl)) { case LPFC_TRAILER_CODE_LINK: @@ -6001,9 +6004,12 @@ void lpfc_sli4_async_event_proc(struct lpfc_hba *phba) &cq_event->cqe.mcqe_cmpl)); break; } + /* Free the completion event processed to the free pool */ lpfc_sli4_cq_event_release(phba, cq_event); + spin_lock_irqsave(&phba->sli4_hba.asynce_list_lock, iflags); } + spin_unlock_irqrestore(&phba->sli4_hba.asynce_list_lock, iflags); } /** @@ -6630,6 +6636,8 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba) /* This abort list used by worker thread */ spin_lock_init(&phba->sli4_hba.sgl_list_lock); spin_lock_init(&phba->sli4_hba.nvmet_io_wait_lock); + spin_lock_init(&phba->sli4_hba.asynce_list_lock); + spin_lock_init(&phba->sli4_hba.els_xri_abrt_list_lock); /* * Initialize driver internal slow-path work queues @@ -6641,8 +6649,6 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba) INIT_LIST_HEAD(&phba->sli4_hba.sp_queue_event); /* Asynchronous event CQ Event work queue list */ INIT_LIST_HEAD(&phba->sli4_hba.sp_asynce_work_queue); - /* Fast-path XRI aborted CQ Event work queue list */ - INIT_LIST_HEAD(&phba->sli4_hba.sp_fcp_xri_aborted_work_queue); /* Slow-path XRI aborted CQ Event work queue list */ INIT_LIST_HEAD(&phba->sli4_hba.sp_els_xri_aborted_work_queue); /* Receive queue CQ Event work queue list */ @@ -10174,26 +10180,28 @@ lpfc_sli4_cq_event_release(struct lpfc_hba *phba, static void lpfc_sli4_cq_event_release_all(struct lpfc_hba *phba) { - LIST_HEAD(cqelist); - struct lpfc_cq_event *cqe; + LIST_HEAD(cq_event_list); + struct lpfc_cq_event *cq_event; unsigned long iflags; /* Retrieve all the pending WCQEs from pending WCQE lists */ - spin_lock_irqsave(&phba->hbalock, iflags); - /* Pending FCP XRI abort events */ - list_splice_init(&phba->sli4_hba.sp_fcp_xri_aborted_work_queue, - &cqelist); + /* Pending ELS XRI abort events */ + spin_lock_irqsave(&phba->sli4_hba.els_xri_abrt_list_lock, iflags); list_splice_init(&phba->sli4_hba.sp_els_xri_aborted_work_queue, - &cqelist); + &cq_event_list); + spin_unlock_irqrestore(&phba->sli4_hba.els_xri_abrt_list_lock, iflags); + /* Pending asynnc events */ + spin_lock_irqsave(&phba->sli4_hba.asynce_list_lock, iflags); list_splice_init(&phba->sli4_hba.sp_asynce_work_queue, - &cqelist); - spin_unlock_irqrestore(&phba->hbalock, iflags); + &cq_event_list); + spin_unlock_irqrestore(&phba->sli4_hba.asynce_list_lock, iflags); - while (!list_empty(&cqelist)) { - list_remove_head(&cqelist, cqe, struct lpfc_cq_event, list); - lpfc_sli4_cq_event_release(phba, cqe); + while (!list_empty(&cq_event_list)) { + list_remove_head(&cq_event_list, cq_event, + struct lpfc_cq_event, list); + lpfc_sli4_cq_event_release(phba, cq_event); } } diff --git a/drivers/scsi/lpfc/lpfc_mem.c b/drivers/scsi/lpfc/lpfc_mem.c index 27ff67e9edae72..be54fbf5146f1a 100644 --- a/drivers/scsi/lpfc/lpfc_mem.c +++ b/drivers/scsi/lpfc/lpfc_mem.c @@ -46,6 +46,7 @@ #define LPFC_MEM_POOL_SIZE 64 /* max elem in non-DMA safety pool */ #define LPFC_DEVICE_DATA_POOL_SIZE 64 /* max elements in device data pool */ #define LPFC_RRQ_POOL_SIZE 256 /* max elements in non-DMA pool */ +#define LPFC_MBX_POOL_SIZE 256 /* max elements in MBX non-DMA pool */ int lpfc_mem_alloc_active_rrq_pool_s4(struct lpfc_hba *phba) { @@ -111,8 +112,8 @@ lpfc_mem_alloc(struct lpfc_hba *phba, int align) pool->current_count++; } - phba->mbox_mem_pool = mempool_create_kmalloc_pool(LPFC_MEM_POOL_SIZE, - sizeof(LPFC_MBOXQ_t)); + phba->mbox_mem_pool = mempool_create_kmalloc_pool(LPFC_MBX_POOL_SIZE, + sizeof(LPFC_MBOXQ_t)); if (!phba->mbox_mem_pool) goto fail_free_mbuf_pool; @@ -588,8 +589,6 @@ lpfc_sli4_rb_free(struct lpfc_hba *phba, struct hbq_dmabuf *dmab) * Description: Allocates a DMA-mapped receive buffer from the lpfc_hrb_pool PCI * pool along a non-DMA-mapped container for it. * - * Notes: Not interrupt-safe. Must be called with no locks held. - * * Returns: * pointer to HBQ on success * NULL on failure @@ -599,7 +598,7 @@ lpfc_sli4_nvmet_alloc(struct lpfc_hba *phba) { struct rqb_dmabuf *dma_buf; - dma_buf = kzalloc(sizeof(struct rqb_dmabuf), GFP_KERNEL); + dma_buf = kzalloc(sizeof(*dma_buf), GFP_KERNEL); if (!dma_buf) return NULL; @@ -722,7 +721,6 @@ lpfc_rq_buf_free(struct lpfc_hba *phba, struct lpfc_dmabuf *mp) drqe.address_hi = putPaddrHigh(rqb_entry->dbuf.phys); rc = lpfc_sli4_rq_put(rqb_entry->hrq, rqb_entry->drq, &hrqe, &drqe); if (rc < 0) { - (rqbp->rqb_free_buffer)(phba, rqb_entry); lpfc_printf_log(phba, KERN_ERR, LOG_INIT, "6409 Cannot post to HRQ %d: %x %x %x " "DRQ %x %x\n", @@ -732,6 +730,7 @@ lpfc_rq_buf_free(struct lpfc_hba *phba, struct lpfc_dmabuf *mp) rqb_entry->hrq->entry_count, rqb_entry->drq->host_index, rqb_entry->drq->hba_index); + (rqbp->rqb_free_buffer)(phba, rqb_entry); } else { list_add_tail(&rqb_entry->hbuf.list, &rqbp->rqb_buffer_list); rqbp->buffer_count++; diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c index 0c39ed50998c86..69f1a0457f51ef 100644 --- a/drivers/scsi/lpfc/lpfc_nvme.c +++ b/drivers/scsi/lpfc/lpfc_nvme.c @@ -2280,6 +2280,8 @@ lpfc_nvme_lport_unreg_wait(struct lpfc_vport *vport, int ret, i, pending = 0; struct lpfc_sli_ring *pring; struct lpfc_hba *phba = vport->phba; + struct lpfc_sli4_hdw_queue *qp; + int abts_scsi, abts_nvme; /* Host transport has to clean up and confirm requiring an indefinite * wait. Print a message if a 10 second wait expires and renew the @@ -2290,17 +2292,23 @@ lpfc_nvme_lport_unreg_wait(struct lpfc_vport *vport, ret = wait_for_completion_timeout(lport_unreg_cmp, wait_tmo); if (unlikely(!ret)) { pending = 0; + abts_scsi = 0; + abts_nvme = 0; for (i = 0; i < phba->cfg_hdw_queue; i++) { - pring = phba->sli4_hba.hdwq[i].io_wq->pring; + qp = &phba->sli4_hba.hdwq[i]; + pring = qp->io_wq->pring; if (!pring) continue; - if (pring->txcmplq_cnt) - pending += pring->txcmplq_cnt; + pending += pring->txcmplq_cnt; + abts_scsi += qp->abts_scsi_io_bufs; + abts_nvme += qp->abts_nvme_io_bufs; } lpfc_printf_vlog(vport, KERN_ERR, LOG_TRACE_EVENT, "6176 Lport x%px Localport x%px wait " - "timed out. Pending %d. Renewing.\n", - lport, vport->localport, pending); + "timed out. Pending %d [%d:%d]. " + "Renewing.\n", + lport, vport->localport, pending, + abts_scsi, abts_nvme); continue; } break; diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index e158cd77d387f4..f103340820c668 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -7248,12 +7248,16 @@ lpfc_post_rq_buffer(struct lpfc_hba *phba, struct lpfc_queue *hrq, struct rqb_dmabuf *rqb_buffer; LIST_HEAD(rqb_buf_list); - spin_lock_irqsave(&phba->hbalock, flags); rqbp = hrq->rqbp; for (i = 0; i < count; i++) { + spin_lock_irqsave(&phba->hbalock, flags); /* IF RQ is already full, don't bother */ - if (rqbp->buffer_count + i >= rqbp->entry_count - 1) + if (rqbp->buffer_count + i >= rqbp->entry_count - 1) { + spin_unlock_irqrestore(&phba->hbalock, flags); break; + } + spin_unlock_irqrestore(&phba->hbalock, flags); + rqb_buffer = rqbp->rqb_alloc_buffer(phba); if (!rqb_buffer) break; @@ -7262,6 +7266,8 @@ lpfc_post_rq_buffer(struct lpfc_hba *phba, struct lpfc_queue *hrq, rqb_buffer->idx = idx; list_add_tail(&rqb_buffer->hbuf.list, &rqb_buf_list); } + + spin_lock_irqsave(&phba->hbalock, flags); while (!list_empty(&rqb_buf_list)) { list_remove_head(&rqb_buf_list, rqb_buffer, struct rqb_dmabuf, hbuf.list); @@ -10364,6 +10370,32 @@ lpfc_extra_ring_setup( struct lpfc_hba *phba) return 0; } +static void +lpfc_sli_post_recovery_event(struct lpfc_hba *phba, + struct lpfc_nodelist *ndlp) +{ + unsigned long iflags; + struct lpfc_work_evt *evtp = &ndlp->recovery_evt; + + spin_lock_irqsave(&phba->hbalock, iflags); + if (!list_empty(&evtp->evt_listp)) { + spin_unlock_irqrestore(&phba->hbalock, iflags); + return; + } + + /* Incrementing the reference count until the queued work is done. */ + evtp->evt_arg1 = lpfc_nlp_get(ndlp); + if (!evtp->evt_arg1) { + spin_unlock_irqrestore(&phba->hbalock, iflags); + return; + } + evtp->evt = LPFC_EVT_RECOVER_PORT; + list_add_tail(&evtp->evt_listp, &phba->work_list); + spin_unlock_irqrestore(&phba->hbalock, iflags); + + lpfc_worker_wake_up(phba); +} + /* lpfc_sli_abts_err_handler - handle a failed ABTS request from an SLI3 port. * @phba: Pointer to HBA context object. * @iocbq: Pointer to iocb object. @@ -10427,7 +10459,6 @@ lpfc_sli4_abts_err_handler(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp, struct sli4_wcqe_xri_aborted *axri) { - struct lpfc_vport *vport; uint32_t ext_status = 0; if (!ndlp || !NLP_CHK_NODE_ACT(ndlp)) { @@ -10437,7 +10468,6 @@ lpfc_sli4_abts_err_handler(struct lpfc_hba *phba, return; } - vport = ndlp->vport; lpfc_printf_log(phba, KERN_WARNING, LOG_SLI, "3116 Port generated FCP XRI ABORT event on " "vpi %d rpi %d xri x%x status 0x%x parameter x%x\n", @@ -10454,7 +10484,7 @@ lpfc_sli4_abts_err_handler(struct lpfc_hba *phba, ext_status = axri->parameter & IOERR_PARAM_MASK; if ((bf_get(lpfc_wcqe_xa_status, axri) == IOSTAT_LOCAL_REJECT) && ((ext_status == IOERR_SEQUENCE_TIMEOUT) || (ext_status == 0))) - lpfc_sli_abts_recover_port(vport, ndlp); + lpfc_sli_post_recovery_event(phba, ndlp); } /** @@ -13062,23 +13092,30 @@ lpfc_sli_intr_handler(int irq, void *dev_id) void lpfc_sli4_els_xri_abort_event_proc(struct lpfc_hba *phba) { struct lpfc_cq_event *cq_event; + unsigned long iflags; /* First, declare the els xri abort event has been handled */ - spin_lock_irq(&phba->hbalock); + spin_lock_irqsave(&phba->hbalock, iflags); phba->hba_flag &= ~ELS_XRI_ABORT_EVENT; - spin_unlock_irq(&phba->hbalock); + spin_unlock_irqrestore(&phba->hbalock, iflags); + /* Now, handle all the els xri abort events */ + spin_lock_irqsave(&phba->sli4_hba.els_xri_abrt_list_lock, iflags); while (!list_empty(&phba->sli4_hba.sp_els_xri_aborted_work_queue)) { /* Get the first event from the head of the event queue */ - spin_lock_irq(&phba->hbalock); list_remove_head(&phba->sli4_hba.sp_els_xri_aborted_work_queue, cq_event, struct lpfc_cq_event, list); - spin_unlock_irq(&phba->hbalock); + spin_unlock_irqrestore(&phba->sli4_hba.els_xri_abrt_list_lock, + iflags); /* Notify aborted XRI for ELS work queue */ lpfc_sli4_els_xri_aborted(phba, &cq_event->cqe.wcqe_axri); + /* Free the event processed back to the free pool */ lpfc_sli4_cq_event_release(phba, cq_event); + spin_lock_irqsave(&phba->sli4_hba.els_xri_abrt_list_lock, + iflags); } + spin_unlock_irqrestore(&phba->sli4_hba.els_xri_abrt_list_lock, iflags); } /** @@ -13289,9 +13326,13 @@ lpfc_sli4_sp_handle_async_event(struct lpfc_hba *phba, struct lpfc_mcqe *mcqe) cq_event = lpfc_cq_event_setup(phba, mcqe, sizeof(struct lpfc_mcqe)); if (!cq_event) return false; - spin_lock_irqsave(&phba->hbalock, iflags); + + spin_lock_irqsave(&phba->sli4_hba.asynce_list_lock, iflags); list_add_tail(&cq_event->list, &phba->sli4_hba.sp_asynce_work_queue); + spin_unlock_irqrestore(&phba->sli4_hba.asynce_list_lock, iflags); + /* Set the async event flag */ + spin_lock_irqsave(&phba->hbalock, iflags); phba->hba_flag |= ASYNC_EVENT; spin_unlock_irqrestore(&phba->hbalock, iflags); @@ -13566,17 +13607,20 @@ lpfc_sli4_sp_handle_abort_xri_wcqe(struct lpfc_hba *phba, break; case LPFC_NVME_LS: /* NVME LS uses ELS resources */ case LPFC_ELS: - cq_event = lpfc_cq_event_setup( - phba, wcqe, sizeof(struct sli4_wcqe_xri_aborted)); - if (!cq_event) - return false; + cq_event = lpfc_cq_event_setup(phba, wcqe, sizeof(*wcqe)); + if (!cq_event) { + workposted = false; + break; + } cq_event->hdwq = cq->hdwq; - spin_lock_irqsave(&phba->hbalock, iflags); + spin_lock_irqsave(&phba->sli4_hba.els_xri_abrt_list_lock, + iflags); list_add_tail(&cq_event->list, &phba->sli4_hba.sp_els_xri_aborted_work_queue); /* Set the els xri abort event flag */ phba->hba_flag |= ELS_XRI_ABORT_EVENT; - spin_unlock_irqrestore(&phba->hbalock, iflags); + spin_unlock_irqrestore(&phba->sli4_hba.els_xri_abrt_list_lock, + iflags); workposted = true; break; default: diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h index a966cdeb52ee7c..100cb1a94811bc 100644 --- a/drivers/scsi/lpfc/lpfc_sli4.h +++ b/drivers/scsi/lpfc/lpfc_sli4.h @@ -920,8 +920,9 @@ struct lpfc_sli4_hba { struct list_head sp_queue_event; struct list_head sp_cqe_event_pool; struct list_head sp_asynce_work_queue; - struct list_head sp_fcp_xri_aborted_work_queue; + spinlock_t asynce_list_lock; /* protect sp_asynce_work_queue list */ struct list_head sp_els_xri_aborted_work_queue; + spinlock_t els_xri_abrt_list_lock; /* protect els_xri_aborted list */ struct list_head sp_unsol_work_queue; struct lpfc_sli4_link link_state; struct lpfc_sli4_lnk_info lnk_info; @@ -1103,8 +1104,7 @@ void lpfc_sli4_async_event_proc(struct lpfc_hba *); void lpfc_sli4_fcf_redisc_event_proc(struct lpfc_hba *); int lpfc_sli4_resume_rpi(struct lpfc_nodelist *, void (*)(struct lpfc_hba *, LPFC_MBOXQ_t *), void *); -void lpfc_sli4_fcp_xri_abort_event_proc(struct lpfc_hba *); -void lpfc_sli4_els_xri_abort_event_proc(struct lpfc_hba *); +void lpfc_sli4_els_xri_abort_event_proc(struct lpfc_hba *phba); void lpfc_sli4_nvme_xri_aborted(struct lpfc_hba *phba, struct sli4_wcqe_xri_aborted *axri, struct lpfc_io_buf *lpfc_ncmd); diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index e158d3d62056ba..cc45cdac13844d 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -8095,7 +8095,7 @@ megasas_mgmt_fw_ioctl(struct megasas_instance *instance, int error = 0, i; void *sense = NULL; dma_addr_t sense_handle; - unsigned long *sense_ptr; + void *sense_ptr; u32 opcode = 0; int ret = DCMD_SUCCESS; @@ -8218,6 +8218,13 @@ megasas_mgmt_fw_ioctl(struct megasas_instance *instance, } if (ioc->sense_len) { + /* make sure the pointer is part of the frame */ + if (ioc->sense_off > + (sizeof(union megasas_frame) - sizeof(__le64))) { + error = -EINVAL; + goto out; + } + sense = dma_alloc_coherent(&instance->pdev->dev, ioc->sense_len, &sense_handle, GFP_KERNEL); if (!sense) { @@ -8225,12 +8232,9 @@ megasas_mgmt_fw_ioctl(struct megasas_instance *instance, goto out; } - sense_ptr = - (unsigned long *) ((unsigned long)cmd->frame + ioc->sense_off); - if (instance->consistent_mask_64bit) - *sense_ptr = cpu_to_le64(sense_handle); - else - *sense_ptr = cpu_to_le32(sense_handle); + /* always store 64 bits regardless of addressing */ + sense_ptr = (void *)cmd->frame + ioc->sense_off; + put_unaligned_le64(sense_handle, sense_ptr); } /* diff --git a/drivers/scsi/pm8001/pm8001_init.c b/drivers/scsi/pm8001/pm8001_init.c index 3cf3e58b697990..2025361b36e964 100644 --- a/drivers/scsi/pm8001/pm8001_init.c +++ b/drivers/scsi/pm8001/pm8001_init.c @@ -1131,7 +1131,8 @@ static int pm8001_pci_probe(struct pci_dev *pdev, pm8001_init_sas_add(pm8001_ha); /* phy setting support for motherboard controller */ - if (pm8001_configure_phy_settings(pm8001_ha)) + rc = pm8001_configure_phy_settings(pm8001_ha); + if (rc) goto err_out_shost; pm8001_post_sas_ha_init(shost, chip); diff --git a/drivers/scsi/pm8001/pm80xx_hwi.c b/drivers/scsi/pm8001/pm80xx_hwi.c index 7593f248afb2ca..155382ce846985 100644 --- a/drivers/scsi/pm8001/pm80xx_hwi.c +++ b/drivers/scsi/pm8001/pm80xx_hwi.c @@ -3363,7 +3363,7 @@ hw_event_sas_phy_up(struct pm8001_hba_info *pm8001_ha, void *piomb) pm8001_get_attached_sas_addr(phy, phy->sas_phy.attached_sas_addr); spin_unlock_irqrestore(&phy->sas_phy.frame_rcvd_lock, flags); if (pm8001_ha->flags == PM8001F_RUN_TIME) - msleep(200);/*delay a moment to wait disk to spinup*/ + mdelay(200); /* delay a moment to wait for disk to spin up */ pm8001_bytes_dmaed(pm8001_ha, phy_id); } diff --git a/drivers/scsi/qedi/qedi_main.c b/drivers/scsi/qedi/qedi_main.c index 61fab01d2d5277..47ad64b0662369 100644 --- a/drivers/scsi/qedi/qedi_main.c +++ b/drivers/scsi/qedi/qedi_main.c @@ -2245,7 +2245,7 @@ qedi_show_boot_tgt_info(struct qedi_ctx *qedi, int type, chap_name); break; case ISCSI_BOOT_TGT_CHAP_SECRET: - rc = sprintf(buf, "%.*s\n", NVM_ISCSI_CFG_CHAP_NAME_MAX_LEN, + rc = sprintf(buf, "%.*s\n", NVM_ISCSI_CFG_CHAP_PWD_MAX_LEN, chap_secret); break; case ISCSI_BOOT_TGT_REV_CHAP_NAME: @@ -2253,7 +2253,7 @@ qedi_show_boot_tgt_info(struct qedi_ctx *qedi, int type, mchap_name); break; case ISCSI_BOOT_TGT_REV_CHAP_SECRET: - rc = sprintf(buf, "%.*s\n", NVM_ISCSI_CFG_CHAP_NAME_MAX_LEN, + rc = sprintf(buf, "%.*s\n", NVM_ISCSI_CFG_CHAP_PWD_MAX_LEN, mchap_secret); break; case ISCSI_BOOT_TGT_FLAGS: @@ -2766,7 +2766,7 @@ static int __qedi_probe(struct pci_dev *pdev, int mode) QEDI_ERR(&qedi->dbg_ctx, "Unable to start offload thread!\n"); rc = -ENODEV; - goto free_cid_que; + goto free_tmf_thread; } INIT_DELAYED_WORK(&qedi->recovery_work, qedi_recovery_handler); @@ -2790,6 +2790,8 @@ static int __qedi_probe(struct pci_dev *pdev, int mode) return 0; +free_tmf_thread: + destroy_workqueue(qedi->tmf_thread); free_cid_que: qedi_release_cid_que(qedi); free_uio: diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index 898c70b8ebbf6a..52e8b555bd1dc6 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -1268,9 +1268,10 @@ qla24xx_async_prli(struct scsi_qla_host *vha, fc_port_t *fcport) lio->u.logio.flags |= SRB_LOGIN_NVME_PRLI; ql_dbg(ql_dbg_disc, vha, 0x211b, - "Async-prli - %8phC hdl=%x, loopid=%x portid=%06x retries=%d %s.\n", + "Async-prli - %8phC hdl=%x, loopid=%x portid=%06x retries=%d fc4type %x priority %x %s.\n", fcport->port_name, sp->handle, fcport->loop_id, fcport->d_id.b24, - fcport->login_retry, NVME_TARGET(vha->hw, fcport) ? "nvme" : "fc"); + fcport->login_retry, fcport->fc4_type, vha->hw->fc4_type_priority, + NVME_TARGET(vha->hw, fcport) ? "nvme" : "fcp"); rval = qla2x00_start_sp(sp); if (rval != QLA_SUCCESS) { @@ -1932,26 +1933,58 @@ qla24xx_handle_prli_done_event(struct scsi_qla_host *vha, struct event_arg *ea) break; } - /* - * Retry PRLI with other FC-4 type if failure occurred on dual - * FCP/NVMe port - */ - if (NVME_FCP_TARGET(ea->fcport)) { - ql_dbg(ql_dbg_disc, vha, 0x2118, - "%s %d %8phC post %s prli\n", - __func__, __LINE__, ea->fcport->port_name, - (ea->fcport->fc4_type & FS_FC4TYPE_NVME) ? - "NVMe" : "FCP"); - if (vha->hw->fc4_type_priority == FC4_PRIORITY_NVME) + ql_dbg(ql_dbg_disc, vha, 0x2118, + "%s %d %8phC priority %s, fc4type %x\n", + __func__, __LINE__, ea->fcport->port_name, + vha->hw->fc4_type_priority == FC4_PRIORITY_FCP ? + "FCP" : "NVMe", ea->fcport->fc4_type); + + if (N2N_TOPO(vha->hw)) { + if (vha->hw->fc4_type_priority == FC4_PRIORITY_NVME) { ea->fcport->fc4_type &= ~FS_FC4TYPE_NVME; - else + ea->fcport->fc4_type |= FS_FC4TYPE_FCP; + } else { ea->fcport->fc4_type &= ~FS_FC4TYPE_FCP; - } + ea->fcport->fc4_type |= FS_FC4TYPE_NVME; + } - ea->fcport->flags &= ~FCF_ASYNC_SENT; - ea->fcport->keep_nport_handle = 0; - ea->fcport->logout_on_delete = 1; - qlt_schedule_sess_for_deletion(ea->fcport); + if (ea->fcport->n2n_link_reset_cnt < 3) { + ea->fcport->n2n_link_reset_cnt++; + vha->relogin_jif = jiffies + 2 * HZ; + /* + * PRLI failed. Reset link to kick start + * state machine + */ + set_bit(N2N_LINK_RESET, &vha->dpc_flags); + } else { + ql_log(ql_log_warn, vha, 0x2119, + "%s %d %8phC Unable to reconnect\n", + __func__, __LINE__, + ea->fcport->port_name); + } + } else { + /* + * switch connect. login failed. Take connection down + * and allow relogin to retrigger + */ + if (NVME_FCP_TARGET(ea->fcport)) { + ql_dbg(ql_dbg_disc, vha, 0x2118, + "%s %d %8phC post %s prli\n", + __func__, __LINE__, + ea->fcport->port_name, + (ea->fcport->fc4_type & FS_FC4TYPE_NVME) + ? "NVMe" : "FCP"); + if (vha->hw->fc4_type_priority == FC4_PRIORITY_NVME) + ea->fcport->fc4_type &= ~FS_FC4TYPE_NVME; + else + ea->fcport->fc4_type &= ~FS_FC4TYPE_FCP; + } + + ea->fcport->flags &= ~FCF_ASYNC_SENT; + ea->fcport->keep_nport_handle = 0; + ea->fcport->logout_on_delete = 1; + qlt_schedule_sess_for_deletion(ea->fcport); + } break; } } diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c index 07afd0d8a8f3ec..d6325fb2ef73b5 100644 --- a/drivers/scsi/qla2xxx/qla_mbx.c +++ b/drivers/scsi/qla2xxx/qla_mbx.c @@ -1129,7 +1129,7 @@ qla2x00_get_fw_version(scsi_qla_host_t *vha) if (ha->flags.scm_supported_a && (ha->fw_attributes_ext[0] & FW_ATTR_EXT0_SCM_SUPPORTED)) { ha->flags.scm_supported_f = 1; - ha->sf_init_cb->flags |= BIT_13; + ha->sf_init_cb->flags |= cpu_to_le16(BIT_13); } ql_log(ql_log_info, vha, 0x11a3, "SCM in FW: %s\n", (ha->flags.scm_supported_f) ? "Supported" : @@ -1137,9 +1137,9 @@ qla2x00_get_fw_version(scsi_qla_host_t *vha) if (vha->flags.nvme2_enabled) { /* set BIT_15 of special feature control block for SLER */ - ha->sf_init_cb->flags |= BIT_15; + ha->sf_init_cb->flags |= cpu_to_le16(BIT_15); /* set BIT_14 of special feature control block for PI CTRL*/ - ha->sf_init_cb->flags |= BIT_14; + ha->sf_init_cb->flags |= cpu_to_le16(BIT_14); } } @@ -3998,9 +3998,6 @@ qla24xx_report_id_acquisition(scsi_qla_host_t *vha, fcport->scan_state = QLA_FCPORT_FOUND; fcport->n2n_flag = 1; fcport->keep_nport_handle = 1; - fcport->fc4_type = FS_FC4TYPE_FCP; - if (vha->flags.nvme_enabled) - fcport->fc4_type |= FS_FC4TYPE_NVME; if (wwn_to_u64(vha->port_name) > wwn_to_u64(fcport->port_name)) { diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index f9c8ae9d669ef0..d389f56fff54a6 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -42,7 +42,7 @@ MODULE_PARM_DESC(ql2xfulldump_on_mpifail, int ql2xenforce_iocb_limit = 1; module_param(ql2xenforce_iocb_limit, int, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(ql2xenforce_iocb_limit, - "Enforce IOCB throttling, to avoid FW congestion. (default: 0)"); + "Enforce IOCB throttling, to avoid FW congestion. (default: 1)"); /* * CT6 CTX allocation cache diff --git a/drivers/scsi/qla2xxx/qla_tmpl.c b/drivers/scsi/qla2xxx/qla_tmpl.c index bd8623ee156a6c..26c13a953b9756 100644 --- a/drivers/scsi/qla2xxx/qla_tmpl.c +++ b/drivers/scsi/qla2xxx/qla_tmpl.c @@ -928,7 +928,8 @@ qla27xx_template_checksum(void *p, ulong size) static inline int qla27xx_verify_template_checksum(struct qla27xx_fwdt_template *tmp) { - return qla27xx_template_checksum(tmp, tmp->template_size) == 0; + return qla27xx_template_checksum(tmp, + le32_to_cpu(tmp->template_size)) == 0; } static inline int @@ -944,7 +945,7 @@ qla27xx_execute_fwdt_template(struct scsi_qla_host *vha, ulong len = 0; if (qla27xx_fwdt_template_valid(tmp)) { - len = tmp->template_size; + len = le32_to_cpu(tmp->template_size); tmp = memcpy(buf, tmp, len); ql27xx_edit_template(vha, tmp); qla27xx_walk_template(vha, tmp, buf, &len); @@ -960,7 +961,7 @@ qla27xx_fwdt_calculate_dump_size(struct scsi_qla_host *vha, void *p) ulong len = 0; if (qla27xx_fwdt_template_valid(tmp)) { - len = tmp->template_size; + len = le32_to_cpu(tmp->template_size); qla27xx_walk_template(vha, tmp, NULL, &len); } @@ -972,7 +973,7 @@ qla27xx_fwdt_template_size(void *p) { struct qla27xx_fwdt_template *tmp = p; - return tmp->template_size; + return le32_to_cpu(tmp->template_size); } int diff --git a/drivers/scsi/qla2xxx/qla_tmpl.h b/drivers/scsi/qla2xxx/qla_tmpl.h index c47184db50813d..6e0987edfcebce 100644 --- a/drivers/scsi/qla2xxx/qla_tmpl.h +++ b/drivers/scsi/qla2xxx/qla_tmpl.h @@ -12,7 +12,7 @@ struct __packed qla27xx_fwdt_template { __le32 template_type; __le32 entry_offset; - uint32_t template_size; + __le32 template_size; uint32_t count; /* borrow field for running/residual count */ __le32 entry_count; diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index 24c0f7ec03511b..4a08c450b756f3 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -6740,7 +6740,7 @@ static int __init scsi_debug_init(void) k = sdeb_zbc_model_str(sdeb_zbc_model_s); if (k < 0) { ret = k; - goto free_vm; + goto free_q_arr; } sdeb_zbc_model = k; switch (sdeb_zbc_model) { @@ -6753,7 +6753,8 @@ static int __init scsi_debug_init(void) break; default: pr_err("Invalid ZBC model\n"); - return -EINVAL; + ret = -EINVAL; + goto free_q_arr; } } if (sdeb_zbc_model != BLK_ZONED_NONE) { diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 03c6d0620bfd0e..31d7a6ddc9db79 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -249,7 +249,8 @@ int __scsi_execute(struct scsi_device *sdev, const unsigned char *cmd, req = blk_get_request(sdev->request_queue, data_direction == DMA_TO_DEVICE ? - REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, BLK_MQ_REQ_PREEMPT); + REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, + rq_flags & RQF_PM ? BLK_MQ_REQ_PM : 0); if (IS_ERR(req)) return ret; rq = scsi_req(req); @@ -1203,6 +1204,8 @@ static blk_status_t scsi_device_state_check(struct scsi_device *sdev, struct request *req) { switch (sdev->sdev_state) { + case SDEV_CREATED: + return BLK_STS_OK; case SDEV_OFFLINE: case SDEV_TRANSPORT_OFFLINE: /* @@ -1229,18 +1232,18 @@ scsi_device_state_check(struct scsi_device *sdev, struct request *req) return BLK_STS_RESOURCE; case SDEV_QUIESCE: /* - * If the devices is blocked we defer normal commands. + * If the device is blocked we only accept power management + * commands. */ - if (req && !(req->rq_flags & RQF_PREEMPT)) + if (req && WARN_ON_ONCE(!(req->rq_flags & RQF_PM))) return BLK_STS_RESOURCE; return BLK_STS_OK; default: /* * For any other not fully online state we only allow - * special commands. In particular any user initiated - * command is not allowed. + * power management commands. */ - if (req && !(req->rq_flags & RQF_PREEMPT)) + if (req && !(req->rq_flags & RQF_PM)) return BLK_STS_IOERR; return BLK_STS_OK; } @@ -2508,15 +2511,13 @@ void sdev_evt_send_simple(struct scsi_device *sdev, EXPORT_SYMBOL_GPL(sdev_evt_send_simple); /** - * scsi_device_quiesce - Block user issued commands. + * scsi_device_quiesce - Block all commands except power management. * @sdev: scsi device to quiesce. * * This works by trying to transition to the SDEV_QUIESCE state * (which must be a legal transition). When the device is in this - * state, only special requests will be accepted, all others will - * be deferred. Since special requests may also be requeued requests, - * a successful return doesn't guarantee the device will be - * totally quiescent. + * state, only power management requests will be accepted, all others will + * be deferred. * * Must be called with user context, may sleep. * @@ -2578,12 +2579,12 @@ void scsi_device_resume(struct scsi_device *sdev) * device deleted during suspend) */ mutex_lock(&sdev->state_mutex); + if (sdev->sdev_state == SDEV_QUIESCE) + scsi_device_set_state(sdev, SDEV_RUNNING); if (sdev->quiesced_by) { sdev->quiesced_by = NULL; blk_clear_pm_only(sdev->request_queue); } - if (sdev->sdev_state == SDEV_QUIESCE) - scsi_device_set_state(sdev, SDEV_RUNNING); mutex_unlock(&sdev->state_mutex); } EXPORT_SYMBOL(scsi_device_resume); @@ -2948,6 +2949,78 @@ void sdev_enable_disk_events(struct scsi_device *sdev) } EXPORT_SYMBOL(sdev_enable_disk_events); +static unsigned char designator_prio(const unsigned char *d) +{ + if (d[1] & 0x30) + /* not associated with LUN */ + return 0; + + if (d[3] == 0) + /* invalid length */ + return 0; + + /* + * Order of preference for lun descriptor: + * - SCSI name string + * - NAA IEEE Registered Extended + * - EUI-64 based 16-byte + * - EUI-64 based 12-byte + * - NAA IEEE Registered + * - NAA IEEE Extended + * - EUI-64 based 8-byte + * - SCSI name string (truncated) + * - T10 Vendor ID + * as longer descriptors reduce the likelyhood + * of identification clashes. + */ + + switch (d[1] & 0xf) { + case 8: + /* SCSI name string, variable-length UTF-8 */ + return 9; + case 3: + switch (d[4] >> 4) { + case 6: + /* NAA registered extended */ + return 8; + case 5: + /* NAA registered */ + return 5; + case 4: + /* NAA extended */ + return 4; + case 3: + /* NAA locally assigned */ + return 1; + default: + break; + } + break; + case 2: + switch (d[3]) { + case 16: + /* EUI64-based, 16 byte */ + return 7; + case 12: + /* EUI64-based, 12 byte */ + return 6; + case 8: + /* EUI64-based, 8 byte */ + return 3; + default: + break; + } + break; + case 1: + /* T10 vendor ID */ + return 1; + default: + break; + } + + return 0; +} + /** * scsi_vpd_lun_id - return a unique device identification * @sdev: SCSI device @@ -2964,7 +3037,7 @@ EXPORT_SYMBOL(sdev_enable_disk_events); */ int scsi_vpd_lun_id(struct scsi_device *sdev, char *id, size_t id_len) { - u8 cur_id_type = 0xff; + u8 cur_id_prio = 0; u8 cur_id_size = 0; const unsigned char *d, *cur_id_str; const struct scsi_vpd *vpd_pg83; @@ -2977,20 +3050,6 @@ int scsi_vpd_lun_id(struct scsi_device *sdev, char *id, size_t id_len) return -ENXIO; } - /* - * Look for the correct descriptor. - * Order of preference for lun descriptor: - * - SCSI name string - * - NAA IEEE Registered Extended - * - EUI-64 based 16-byte - * - EUI-64 based 12-byte - * - NAA IEEE Registered - * - NAA IEEE Extended - * - T10 Vendor ID - * as longer descriptors reduce the likelyhood - * of identification clashes. - */ - /* The id string must be at least 20 bytes + terminating NULL byte */ if (id_len < 21) { rcu_read_unlock(); @@ -3000,8 +3059,9 @@ int scsi_vpd_lun_id(struct scsi_device *sdev, char *id, size_t id_len) memset(id, 0, id_len); d = vpd_pg83->data + 4; while (d < vpd_pg83->data + vpd_pg83->len) { - /* Skip designators not referring to the LUN */ - if ((d[1] & 0x30) != 0x00) + u8 prio = designator_prio(d); + + if (prio == 0 || cur_id_prio > prio) goto next_desig; switch (d[1] & 0xf) { @@ -3009,28 +3069,19 @@ int scsi_vpd_lun_id(struct scsi_device *sdev, char *id, size_t id_len) /* T10 Vendor ID */ if (cur_id_size > d[3]) break; - /* Prefer anything */ - if (cur_id_type > 0x01 && cur_id_type != 0xff) - break; + cur_id_prio = prio; cur_id_size = d[3]; if (cur_id_size + 4 > id_len) cur_id_size = id_len - 4; cur_id_str = d + 4; - cur_id_type = d[1] & 0xf; id_size = snprintf(id, id_len, "t10.%*pE", cur_id_size, cur_id_str); break; case 0x2: /* EUI-64 */ - if (cur_id_size > d[3]) - break; - /* Prefer NAA IEEE Registered Extended */ - if (cur_id_type == 0x3 && - cur_id_size == d[3]) - break; + cur_id_prio = prio; cur_id_size = d[3]; cur_id_str = d + 4; - cur_id_type = d[1] & 0xf; switch (cur_id_size) { case 8: id_size = snprintf(id, id_len, @@ -3048,17 +3099,14 @@ int scsi_vpd_lun_id(struct scsi_device *sdev, char *id, size_t id_len) cur_id_str); break; default: - cur_id_size = 0; break; } break; case 0x3: /* NAA */ - if (cur_id_size > d[3]) - break; + cur_id_prio = prio; cur_id_size = d[3]; cur_id_str = d + 4; - cur_id_type = d[1] & 0xf; switch (cur_id_size) { case 8: id_size = snprintf(id, id_len, @@ -3071,26 +3119,25 @@ int scsi_vpd_lun_id(struct scsi_device *sdev, char *id, size_t id_len) cur_id_str); break; default: - cur_id_size = 0; break; } break; case 0x8: /* SCSI name string */ - if (cur_id_size + 4 > d[3]) + if (cur_id_size > d[3]) break; /* Prefer others for truncated descriptor */ - if (cur_id_size && d[3] > id_len) - break; + if (d[3] > id_len) { + prio = 2; + if (cur_id_prio > prio) + break; + } + cur_id_prio = prio; cur_id_size = id_size = d[3]; cur_id_str = d + 4; - cur_id_type = d[1] & 0xf; if (cur_id_size >= id_len) cur_id_size = id_len - 1; memcpy(id, cur_id_str, cur_id_size); - /* Decrease priority for truncated descriptor */ - if (cur_id_size != id_size) - cur_id_size = 6; break; default: break; diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index 2eb3e4f9375a57..2e68c0a876986d 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -2313,7 +2313,9 @@ iscsi_create_conn(struct iscsi_cls_session *session, int dd_size, uint32_t cid) return conn; release_conn_ref: - put_device(&conn->dev); + device_unregister(&conn->dev); + put_device(&session->dev); + return NULL; release_parent_ref: put_device(&session->dev); free_conn: diff --git a/drivers/scsi/scsi_transport_spi.c b/drivers/scsi/scsi_transport_spi.c index f3d5b1bbd5aa7e..c37dd15d16d24f 100644 --- a/drivers/scsi/scsi_transport_spi.c +++ b/drivers/scsi/scsi_transport_spi.c @@ -117,12 +117,16 @@ static int spi_execute(struct scsi_device *sdev, const void *cmd, sshdr = &sshdr_tmp; for(i = 0; i < DV_RETRIES; i++) { + /* + * The purpose of the RQF_PM flag below is to bypass the + * SDEV_QUIESCE state. + */ result = scsi_execute(sdev, cmd, dir, buffer, bufflen, sense, sshdr, DV_TIMEOUT, /* retries */ 1, REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER, - 0, NULL); + RQF_PM, NULL); if (driver_byte(result) != DRIVER_SENSE || sshdr->sense_key != UNIT_ATTENTION) break; @@ -1005,23 +1009,26 @@ spi_dv_device(struct scsi_device *sdev) */ lock_system_sleep(); + if (scsi_autopm_get_device(sdev)) + goto unlock_system_sleep; + if (unlikely(spi_dv_in_progress(starget))) - goto unlock; + goto put_autopm; if (unlikely(scsi_device_get(sdev))) - goto unlock; + goto put_autopm; spi_dv_in_progress(starget) = 1; buffer = kzalloc(len, GFP_KERNEL); if (unlikely(!buffer)) - goto out_put; + goto put_sdev; /* We need to verify that the actual device will quiesce; the * later target quiesce is just a nice to have */ if (unlikely(scsi_device_quiesce(sdev))) - goto out_free; + goto free_buffer; scsi_target_quiesce(starget); @@ -1041,12 +1048,16 @@ spi_dv_device(struct scsi_device *sdev) spi_initial_dv(starget) = 1; - out_free: +free_buffer: kfree(buffer); - out_put: + +put_sdev: spi_dv_in_progress(starget) = 0; scsi_device_put(sdev); -unlock: +put_autopm: + scsi_autopm_put_device(sdev); + +unlock_system_sleep: unlock_system_sleep(); } EXPORT_SYMBOL(spi_dv_device); diff --git a/drivers/scsi/scsi_transport_srp.c b/drivers/scsi/scsi_transport_srp.c index cba1cf6a1c12dc..1e939a2a387f3f 100644 --- a/drivers/scsi/scsi_transport_srp.c +++ b/drivers/scsi/scsi_transport_srp.c @@ -541,7 +541,14 @@ int srp_reconnect_rport(struct srp_rport *rport) res = mutex_lock_interruptible(&rport->mutex); if (res) goto out; - scsi_target_block(&shost->shost_gendev); + if (rport->state != SRP_RPORT_FAIL_FAST) + /* + * sdev state must be SDEV_TRANSPORT_OFFLINE, transition + * to SDEV_BLOCK is illegal. Calling scsi_target_unblock() + * later is ok though, scsi_internal_device_unblock_nowait() + * treats SDEV_TRANSPORT_OFFLINE like SDEV_BLOCK. + */ + scsi_target_block(&shost->shost_gendev); res = rport->state != SRP_RPORT_LOST ? i->f->reconnect(rport) : -ENODEV; pr_debug("%s (state %d): transport.reconnect() returned %d\n", dev_name(&shost->shost_gendev), rport->state, res); diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 656bcf4940d6d1..fedb89d4ac3f0b 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -986,8 +986,10 @@ static blk_status_t sd_setup_write_zeroes_cmnd(struct scsi_cmnd *cmd) } } - if (sdp->no_write_same) + if (sdp->no_write_same) { + rq->rq_flags |= RQF_QUIET; return BLK_STS_TARGET; + } if (sdkp->ws16 || lba > 0xffffffff || nr_blocks > 0xffff) return sd_setup_write_same16_cmnd(cmd, false); diff --git a/drivers/scsi/ufs/Kconfig b/drivers/scsi/ufs/Kconfig index dcdb4eb1f90ba5..c339517b7a0944 100644 --- a/drivers/scsi/ufs/Kconfig +++ b/drivers/scsi/ufs/Kconfig @@ -72,6 +72,7 @@ config SCSI_UFS_DWC_TC_PCI config SCSI_UFSHCD_PLATFORM tristate "Platform bus based UFS Controller support" depends on SCSI_UFSHCD + depends on HAS_IOMEM help This selects the UFS host controller support. Select this if you have an UFS controller on Platform bus. diff --git a/drivers/scsi/ufs/ufs-mediatek.c b/drivers/scsi/ufs/ufs-mediatek.c index 8df73bc2f8cb24..914a827a93ee8c 100644 --- a/drivers/scsi/ufs/ufs-mediatek.c +++ b/drivers/scsi/ufs/ufs-mediatek.c @@ -743,7 +743,7 @@ static int ufs_mtk_link_startup_notify(struct ufs_hba *hba, return ret; } -static void ufs_mtk_device_reset(struct ufs_hba *hba) +static int ufs_mtk_device_reset(struct ufs_hba *hba) { struct arm_smccc_res res; @@ -764,6 +764,8 @@ static void ufs_mtk_device_reset(struct ufs_hba *hba) usleep_range(10000, 15000); dev_info(hba->dev, "device reset done\n"); + + return 0; } static int ufs_mtk_link_set_hpm(struct ufs_hba *hba) diff --git a/drivers/scsi/ufs/ufs-qcom.c b/drivers/scsi/ufs/ufs-qcom.c index f9d6ef3565407c..a244c8ae1b4eb3 100644 --- a/drivers/scsi/ufs/ufs-qcom.c +++ b/drivers/scsi/ufs/ufs-qcom.c @@ -1421,13 +1421,13 @@ static void ufs_qcom_dump_dbg_regs(struct ufs_hba *hba) * * Toggles the (optional) reset line to reset the attached device. */ -static void ufs_qcom_device_reset(struct ufs_hba *hba) +static int ufs_qcom_device_reset(struct ufs_hba *hba) { struct ufs_qcom_host *host = ufshcd_get_variant(hba); /* reset gpio is optional */ if (!host->device_reset) - return; + return -EOPNOTSUPP; /* * The UFS device shall detect reset pulses of 1us, sleep for 10us to @@ -1438,6 +1438,8 @@ static void ufs_qcom_device_reset(struct ufs_hba *hba) gpiod_set_value_cansleep(host->device_reset, 0); usleep_range(10, 15); + + return 0; } #if IS_ENABLED(CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND) diff --git a/drivers/scsi/ufs/ufshcd-pci.c b/drivers/scsi/ufs/ufshcd-pci.c index df3a564c3e3348..fadd566025b86a 100644 --- a/drivers/scsi/ufs/ufshcd-pci.c +++ b/drivers/scsi/ufs/ufshcd-pci.c @@ -148,6 +148,8 @@ static int ufs_intel_common_init(struct ufs_hba *hba) { struct intel_host *host; + hba->caps |= UFSHCD_CAP_RPM_AUTOSUSPEND; + host = devm_kzalloc(hba->dev, sizeof(*host), GFP_KERNEL); if (!host) return -ENOMEM; @@ -163,6 +165,41 @@ static void ufs_intel_common_exit(struct ufs_hba *hba) intel_ltr_hide(hba->dev); } +static int ufs_intel_resume(struct ufs_hba *hba, enum ufs_pm_op op) +{ + /* + * To support S4 (suspend-to-disk) with spm_lvl other than 5, the base + * address registers must be restored because the restore kernel can + * have used different addresses. + */ + ufshcd_writel(hba, lower_32_bits(hba->utrdl_dma_addr), + REG_UTP_TRANSFER_REQ_LIST_BASE_L); + ufshcd_writel(hba, upper_32_bits(hba->utrdl_dma_addr), + REG_UTP_TRANSFER_REQ_LIST_BASE_H); + ufshcd_writel(hba, lower_32_bits(hba->utmrdl_dma_addr), + REG_UTP_TASK_REQ_LIST_BASE_L); + ufshcd_writel(hba, upper_32_bits(hba->utmrdl_dma_addr), + REG_UTP_TASK_REQ_LIST_BASE_H); + + if (ufshcd_is_link_hibern8(hba)) { + int ret = ufshcd_uic_hibern8_exit(hba); + + if (!ret) { + ufshcd_set_link_active(hba); + } else { + dev_err(hba->dev, "%s: hibern8 exit failed %d\n", + __func__, ret); + /* + * Force reset and restore. Any other actions can lead + * to an unrecoverable state. + */ + ufshcd_set_link_off(hba); + } + } + + return 0; +} + static int ufs_intel_ehl_init(struct ufs_hba *hba) { hba->quirks |= UFSHCD_QUIRK_BROKEN_AUTO_HIBERN8; @@ -174,6 +211,7 @@ static struct ufs_hba_variant_ops ufs_intel_cnl_hba_vops = { .init = ufs_intel_common_init, .exit = ufs_intel_common_exit, .link_startup_notify = ufs_intel_link_startup_notify, + .resume = ufs_intel_resume, }; static struct ufs_hba_variant_ops ufs_intel_ehl_hba_vops = { @@ -181,6 +219,7 @@ static struct ufs_hba_variant_ops ufs_intel_ehl_hba_vops = { .init = ufs_intel_ehl_init, .exit = ufs_intel_common_exit, .link_startup_notify = ufs_intel_link_startup_notify, + .resume = ufs_intel_resume, }; #ifdef CONFIG_PM_SLEEP @@ -207,6 +246,30 @@ static int ufshcd_pci_resume(struct device *dev) { return ufshcd_system_resume(dev_get_drvdata(dev)); } + +/** + * ufshcd_pci_poweroff - suspend-to-disk poweroff function + * @dev: pointer to PCI device handle + * + * Returns 0 if successful + * Returns non-zero otherwise + */ +static int ufshcd_pci_poweroff(struct device *dev) +{ + struct ufs_hba *hba = dev_get_drvdata(dev); + int spm_lvl = hba->spm_lvl; + int ret; + + /* + * For poweroff we need to set the UFS device to PowerDown mode. + * Force spm_lvl to ensure that. + */ + hba->spm_lvl = 5; + ret = ufshcd_system_suspend(hba); + hba->spm_lvl = spm_lvl; + return ret; +} + #endif /* !CONFIG_PM_SLEEP */ #ifdef CONFIG_PM @@ -302,8 +365,14 @@ ufshcd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) } static const struct dev_pm_ops ufshcd_pci_pm_ops = { - SET_SYSTEM_SLEEP_PM_OPS(ufshcd_pci_suspend, - ufshcd_pci_resume) +#ifdef CONFIG_PM_SLEEP + .suspend = ufshcd_pci_suspend, + .resume = ufshcd_pci_resume, + .freeze = ufshcd_pci_suspend, + .thaw = ufshcd_pci_resume, + .poweroff = ufshcd_pci_poweroff, + .restore = ufshcd_pci_resume, +#endif SET_RUNTIME_PM_OPS(ufshcd_pci_runtime_suspend, ufshcd_pci_runtime_resume, ufshcd_pci_runtime_idle) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 0c148fcd24debc..81328932846706 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -283,7 +283,8 @@ static inline void ufshcd_wb_config(struct ufs_hba *hba) if (ret) dev_err(hba->dev, "%s: En WB flush during H8: failed: %d\n", __func__, ret); - ufshcd_wb_toggle_flush(hba, true); + if (!(hba->quirks & UFSHCI_QUIRK_SKIP_MANUAL_WB_FLUSH_CTRL)) + ufshcd_wb_toggle_flush(hba, true); } static void ufshcd_scsi_unblock_requests(struct ufs_hba *hba) @@ -1751,8 +1752,9 @@ static void __ufshcd_release(struct ufs_hba *hba) if (hba->clk_gating.active_reqs || hba->clk_gating.is_suspended || hba->ufshcd_state != UFSHCD_STATE_OPERATIONAL || - ufshcd_any_tag_in_use(hba) || hba->outstanding_tasks || - hba->active_uic_cmd || hba->uic_async_done) + hba->outstanding_tasks || + hba->active_uic_cmd || hba->uic_async_done || + hba->clk_gating.state == CLKS_OFF) return; hba->clk_gating.state = REQ_CLKS_OFF; @@ -3619,7 +3621,7 @@ static int ufshcd_dme_enable(struct ufs_hba *hba) ret = ufshcd_send_uic_cmd(hba, &uic_cmd); if (ret) dev_err(hba->dev, - "dme-reset: error code %d\n", ret); + "dme-enable: error code %d\n", ret); return ret; } @@ -4911,7 +4913,8 @@ ufshcd_transfer_rsp_status(struct ufs_hba *hba, struct ufshcd_lrb *lrbp) break; } /* end of switch */ - if ((host_byte(result) != DID_OK) && !hba->silence_err_logs) + if ((host_byte(result) != DID_OK) && + (host_byte(result) != DID_REQUEUE) && !hba->silence_err_logs) ufshcd_print_trs(hba, 1 << lrbp->task_tag, true); return result; } @@ -5352,9 +5355,6 @@ static int ufshcd_wb_toggle_flush_during_h8(struct ufs_hba *hba, bool set) static inline void ufshcd_wb_toggle_flush(struct ufs_hba *hba, bool enable) { - if (hba->quirks & UFSHCI_QUIRK_SKIP_MANUAL_WB_FLUSH_CTRL) - return; - if (enable) ufshcd_wb_buf_flush_enable(hba); else @@ -6209,9 +6209,13 @@ static irqreturn_t ufshcd_intr(int irq, void *__hba) intr_status = ufshcd_readl(hba, REG_INTERRUPT_STATUS); } - if (enabled_intr_status && retval == IRQ_NONE) { - dev_err(hba->dev, "%s: Unhandled interrupt 0x%08x\n", - __func__, intr_status); + if (enabled_intr_status && retval == IRQ_NONE && + !ufshcd_eh_in_progress(hba)) { + dev_err(hba->dev, "%s: Unhandled interrupt 0x%08x (0x%08x, 0x%08x)\n", + __func__, + intr_status, + hba->ufs_stats.last_intr_status, + enabled_intr_status); ufshcd_dump_regs(hba, 0, UFSHCI_REG_SPACE_SIZE, "host_regs: "); } @@ -6255,7 +6259,10 @@ static int __ufshcd_issue_tm_cmd(struct ufs_hba *hba, * Even though we use wait_event() which sleeps indefinitely, * the maximum wait time is bounded by %TM_CMD_TIMEOUT. */ - req = blk_get_request(q, REQ_OP_DRV_OUT, BLK_MQ_REQ_RESERVED); + req = blk_get_request(q, REQ_OP_DRV_OUT, 0); + if (IS_ERR(req)) + return PTR_ERR(req); + req->end_io_data = &wait; free_slot = req->tag; WARN_ON_ONCE(free_slot < 0 || free_slot >= hba->nutmrs); @@ -6568,19 +6575,16 @@ static int ufshcd_eh_device_reset_handler(struct scsi_cmnd *cmd) { struct Scsi_Host *host; struct ufs_hba *hba; - unsigned int tag; u32 pos; int err; - u8 resp = 0xF; - struct ufshcd_lrb *lrbp; + u8 resp = 0xF, lun; unsigned long flags; host = cmd->device->host; hba = shost_priv(host); - tag = cmd->request->tag; - lrbp = &hba->lrb[tag]; - err = ufshcd_issue_tm_cmd(hba, lrbp->lun, 0, UFS_LOGICAL_RESET, &resp); + lun = ufshcd_scsi_to_upiu_lun(cmd->device->lun); + err = ufshcd_issue_tm_cmd(hba, lun, 0, UFS_LOGICAL_RESET, &resp); if (err || resp != UPIU_TASK_MANAGEMENT_FUNC_COMPL) { if (!err) err = resp; @@ -6589,7 +6593,7 @@ static int ufshcd_eh_device_reset_handler(struct scsi_cmnd *cmd) /* clear the commands that were pending for corresponding LUN */ for_each_set_bit(pos, &hba->outstanding_reqs, hba->nutrs) { - if (hba->lrb[pos].lun == lrbp->lun) { + if (hba->lrb[pos].lun == lun) { err = ufshcd_clear_cmd(hba, pos); if (err) break; @@ -7092,7 +7096,6 @@ static inline void ufshcd_blk_pm_runtime_init(struct scsi_device *sdev) static int ufshcd_scsi_add_wlus(struct ufs_hba *hba) { int ret = 0; - struct scsi_device *sdev_rpmb; struct scsi_device *sdev_boot; hba->sdev_ufs_device = __scsi_add_device(hba->host, 0, 0, @@ -7105,14 +7108,14 @@ static int ufshcd_scsi_add_wlus(struct ufs_hba *hba) ufshcd_blk_pm_runtime_init(hba->sdev_ufs_device); scsi_device_put(hba->sdev_ufs_device); - sdev_rpmb = __scsi_add_device(hba->host, 0, 0, + hba->sdev_rpmb = __scsi_add_device(hba->host, 0, 0, ufshcd_upiu_wlun_to_scsi_wlun(UFS_UPIU_RPMB_WLUN), NULL); - if (IS_ERR(sdev_rpmb)) { - ret = PTR_ERR(sdev_rpmb); + if (IS_ERR(hba->sdev_rpmb)) { + ret = PTR_ERR(hba->sdev_rpmb); goto remove_sdev_ufs_device; } - ufshcd_blk_pm_runtime_init(sdev_rpmb); - scsi_device_put(sdev_rpmb); + ufshcd_blk_pm_runtime_init(hba->sdev_rpmb); + scsi_device_put(hba->sdev_rpmb); sdev_boot = __scsi_add_device(hba->host, 0, 0, ufshcd_upiu_wlun_to_scsi_wlun(UFS_UPIU_BOOT_WLUN), NULL); @@ -7636,6 +7639,63 @@ static int ufshcd_add_lus(struct ufs_hba *hba) return ret; } +static int +ufshcd_send_request_sense(struct ufs_hba *hba, struct scsi_device *sdp); + +static int ufshcd_clear_ua_wlun(struct ufs_hba *hba, u8 wlun) +{ + struct scsi_device *sdp; + unsigned long flags; + int ret = 0; + + spin_lock_irqsave(hba->host->host_lock, flags); + if (wlun == UFS_UPIU_UFS_DEVICE_WLUN) + sdp = hba->sdev_ufs_device; + else if (wlun == UFS_UPIU_RPMB_WLUN) + sdp = hba->sdev_rpmb; + else + BUG(); + if (sdp) { + ret = scsi_device_get(sdp); + if (!ret && !scsi_device_online(sdp)) { + ret = -ENODEV; + scsi_device_put(sdp); + } + } else { + ret = -ENODEV; + } + spin_unlock_irqrestore(hba->host->host_lock, flags); + if (ret) + goto out_err; + + ret = ufshcd_send_request_sense(hba, sdp); + scsi_device_put(sdp); +out_err: + if (ret) + dev_err(hba->dev, "%s: UAC clear LU=%x ret = %d\n", + __func__, wlun, ret); + return ret; +} + +static int ufshcd_clear_ua_wluns(struct ufs_hba *hba) +{ + int ret = 0; + + if (!hba->wlun_dev_clr_ua) + goto out; + + ret = ufshcd_clear_ua_wlun(hba, UFS_UPIU_UFS_DEVICE_WLUN); + if (!ret) + ret = ufshcd_clear_ua_wlun(hba, UFS_UPIU_RPMB_WLUN); + if (!ret) + hba->wlun_dev_clr_ua = false; +out: + if (ret) + dev_err(hba->dev, "%s: Failed to clear UAC WLUNS ret = %d\n", + __func__, ret); + return ret; +} + /** * ufshcd_probe_hba - probe hba to detect device and initialize * @hba: per-adapter instance @@ -7755,6 +7815,8 @@ static void ufshcd_async_scan(void *data, async_cookie_t cookie) pm_runtime_put_sync(hba->dev); ufshcd_exit_clk_scaling(hba); ufshcd_hba_exit(hba); + } else { + ufshcd_clear_ua_wluns(hba); } } @@ -8759,7 +8821,8 @@ int ufshcd_system_suspend(struct ufs_hba *hba) if ((ufs_get_pm_lvl_to_dev_pwr_mode(hba->spm_lvl) == hba->curr_dev_pwr_mode) && (ufs_get_pm_lvl_to_link_pwr_state(hba->spm_lvl) == - hba->uic_link_state)) + hba->uic_link_state) && + !hba->dev_info.b_rpm_dev_flush_capable) goto out; if (pm_runtime_suspended(hba->dev)) { diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h index e0f00a42371c52..6c62a281c86311 100644 --- a/drivers/scsi/ufs/ufshcd.h +++ b/drivers/scsi/ufs/ufshcd.h @@ -318,7 +318,7 @@ struct ufs_hba_variant_ops { int (*resume)(struct ufs_hba *, enum ufs_pm_op); void (*dbg_register_dump)(struct ufs_hba *hba); int (*phy_initialization)(struct ufs_hba *); - void (*device_reset)(struct ufs_hba *hba); + int (*device_reset)(struct ufs_hba *hba); void (*config_scaling_param)(struct ufs_hba *hba, struct devfreq_dev_profile *profile, void *data); @@ -683,6 +683,7 @@ struct ufs_hba { * "UFS device" W-LU. */ struct scsi_device *sdev_ufs_device; + struct scsi_device *sdev_rpmb; enum ufs_dev_pwr_mode curr_dev_pwr_mode; enum uic_link_state uic_link_state; @@ -1181,9 +1182,17 @@ static inline void ufshcd_vops_dbg_register_dump(struct ufs_hba *hba) static inline void ufshcd_vops_device_reset(struct ufs_hba *hba) { if (hba->vops && hba->vops->device_reset) { - hba->vops->device_reset(hba); - ufshcd_set_ufs_dev_active(hba); - ufshcd_update_reg_hist(&hba->ufs_stats.dev_reset, 0); + int err = hba->vops->device_reset(hba); + + if (!err) { + ufshcd_set_ufs_dev_active(hba); + if (ufshcd_is_wb_allowed(hba)) { + hba->wb_enabled = false; + hba->wb_buf_flush_enabled = false; + } + } + if (err != -EOPNOTSUPP) + ufshcd_update_reg_hist(&hba->ufs_stats.dev_reset, err); } } diff --git a/drivers/slimbus/qcom-ctrl.c b/drivers/slimbus/qcom-ctrl.c index 4aad2566f52d27..f04b961b96cd4d 100644 --- a/drivers/slimbus/qcom-ctrl.c +++ b/drivers/slimbus/qcom-ctrl.c @@ -472,15 +472,10 @@ static void qcom_slim_rxwq(struct work_struct *work) static void qcom_slim_prg_slew(struct platform_device *pdev, struct qcom_slim_ctrl *ctrl) { - struct resource *slew_mem; - if (!ctrl->slew_reg) { /* SLEW RATE register for this SLIMbus */ - slew_mem = platform_get_resource_byname(pdev, IORESOURCE_MEM, - "slew"); - ctrl->slew_reg = devm_ioremap(&pdev->dev, slew_mem->start, - resource_size(slew_mem)); - if (!ctrl->slew_reg) + ctrl->slew_reg = devm_platform_ioremap_resource_byname(pdev, "slew"); + if (IS_ERR(ctrl->slew_reg)) return; } diff --git a/drivers/slimbus/qcom-ngd-ctrl.c b/drivers/slimbus/qcom-ngd-ctrl.c index 218aefc3531cd0..50cfd67c2871e8 100644 --- a/drivers/slimbus/qcom-ngd-ctrl.c +++ b/drivers/slimbus/qcom-ngd-ctrl.c @@ -1205,6 +1205,9 @@ static int qcom_slim_ngd_runtime_resume(struct device *dev) struct qcom_slim_ngd_ctrl *ctrl = dev_get_drvdata(dev); int ret = 0; + if (!ctrl->qmi.handle) + return 0; + if (ctrl->state >= QCOM_SLIM_NGD_CTRL_ASLEEP) ret = qcom_slim_ngd_power_up(ctrl); if (ret) { @@ -1503,6 +1506,9 @@ static int __maybe_unused qcom_slim_ngd_runtime_suspend(struct device *dev) struct qcom_slim_ngd_ctrl *ctrl = dev_get_drvdata(dev); int ret = 0; + if (!ctrl->qmi.handle) + return 0; + ret = qcom_slim_qmi_power_request(ctrl, false); if (ret && ret != -EBUSY) dev_info(ctrl->dev, "slim resource not idle:%d\n", ret); diff --git a/drivers/soc/amlogic/meson-canvas.c b/drivers/soc/amlogic/meson-canvas.c index c655f5f92b1241..d0329ad170d13b 100644 --- a/drivers/soc/amlogic/meson-canvas.c +++ b/drivers/soc/amlogic/meson-canvas.c @@ -72,8 +72,10 @@ struct meson_canvas *meson_canvas_get(struct device *dev) * current state, this driver probe cannot return -EPROBE_DEFER */ canvas = dev_get_drvdata(&canvas_pdev->dev); - if (!canvas) + if (!canvas) { + put_device(&canvas_pdev->dev); return ERR_PTR(-EINVAL); + } return canvas; } diff --git a/drivers/soc/atmel/soc.c b/drivers/soc/atmel/soc.c index 55a1f57a4d8cb0..5d06ee70a36b98 100644 --- a/drivers/soc/atmel/soc.c +++ b/drivers/soc/atmel/soc.c @@ -265,8 +265,21 @@ struct soc_device * __init at91_soc_init(const struct at91_soc *socs) return soc_dev; } +static const struct of_device_id at91_soc_allowed_list[] __initconst = { + { .compatible = "atmel,at91rm9200", }, + { .compatible = "atmel,at91sam9", }, + { .compatible = "atmel,sama5", }, + { .compatible = "atmel,samv7", }, + { } +}; + static int __init atmel_soc_device_init(void) { + struct device_node *np = of_find_node_by_path("/"); + + if (!of_match_node(at91_soc_allowed_list, np)) + return 0; + at91_soc_init(socs); return 0; diff --git a/drivers/soc/imx/Kconfig b/drivers/soc/imx/Kconfig index a9370f4aacca97..05812f8ae73406 100644 --- a/drivers/soc/imx/Kconfig +++ b/drivers/soc/imx/Kconfig @@ -13,7 +13,7 @@ config SOC_IMX8M depends on ARCH_MXC || COMPILE_TEST default ARCH_MXC && ARM64 select SOC_BUS - select ARM_GIC_V3 if ARCH_MXC + select ARM_GIC_V3 if ARCH_MXC && ARCH_MULTI_V7 help If you say yes here you get support for the NXP i.MX8M family support, it will provide the SoC info like SoC family, diff --git a/drivers/soc/mediatek/mtk-scpsys.c b/drivers/soc/mediatek/mtk-scpsys.c index f669d3754627da..ca75b14931ec9e 100644 --- a/drivers/soc/mediatek/mtk-scpsys.c +++ b/drivers/soc/mediatek/mtk-scpsys.c @@ -524,6 +524,7 @@ static void mtk_register_power_domains(struct platform_device *pdev, for (i = 0; i < num; i++) { struct scp_domain *scpd = &scp->domains[i]; struct generic_pm_domain *genpd = &scpd->genpd; + bool on; /* * Initially turn on all domains to make the domains usable @@ -531,9 +532,9 @@ static void mtk_register_power_domains(struct platform_device *pdev, * software. The unused domains will be switched off during * late_init time. */ - genpd->power_on(genpd); + on = !WARN_ON(genpd->power_on(genpd) < 0); - pm_genpd_init(genpd, NULL, false); + pm_genpd_init(genpd, NULL, !on); } /* diff --git a/drivers/soc/qcom/pdr_interface.c b/drivers/soc/qcom/pdr_interface.c index 088dc99f77f3fe..f63135c09667f9 100644 --- a/drivers/soc/qcom/pdr_interface.c +++ b/drivers/soc/qcom/pdr_interface.c @@ -569,7 +569,7 @@ EXPORT_SYMBOL(pdr_add_lookup); int pdr_restart_pd(struct pdr_handle *pdr, struct pdr_service *pds) { struct servreg_restart_pd_resp resp; - struct servreg_restart_pd_req req; + struct servreg_restart_pd_req req = { 0 }; struct sockaddr_qrtr addr; struct pdr_service *tmp; struct qmi_txn txn; diff --git a/drivers/soc/qcom/qcom-geni-se.c b/drivers/soc/qcom/qcom-geni-se.c index d0e4f520cff8cf..751a49f6534f45 100644 --- a/drivers/soc/qcom/qcom-geni-se.c +++ b/drivers/soc/qcom/qcom-geni-se.c @@ -289,10 +289,23 @@ static void geni_se_select_fifo_mode(struct geni_se *se) static void geni_se_select_dma_mode(struct geni_se *se) { + u32 proto = geni_se_read_proto(se); u32 val; geni_se_irq_clear(se); + val = readl_relaxed(se->base + SE_GENI_M_IRQ_EN); + if (proto != GENI_SE_UART) { + val &= ~(M_CMD_DONE_EN | M_TX_FIFO_WATERMARK_EN); + val &= ~(M_RX_FIFO_WATERMARK_EN | M_RX_FIFO_LAST_EN); + } + writel_relaxed(val, se->base + SE_GENI_M_IRQ_EN); + + val = readl_relaxed(se->base + SE_GENI_S_IRQ_EN); + if (proto != GENI_SE_UART) + val &= ~S_CMD_DONE_EN; + writel_relaxed(val, se->base + SE_GENI_S_IRQ_EN); + val = readl_relaxed(se->base + SE_GENI_DMA_MODE_EN); val |= GENI_DMA_MODE_EN; writel_relaxed(val, se->base + SE_GENI_DMA_MODE_EN); @@ -651,7 +664,7 @@ int geni_se_tx_dma_prep(struct geni_se *se, void *buf, size_t len, writel_relaxed(lower_32_bits(*iova), se->base + SE_DMA_TX_PTR_L); writel_relaxed(upper_32_bits(*iova), se->base + SE_DMA_TX_PTR_H); writel_relaxed(GENI_SE_DMA_EOT_BUF, se->base + SE_DMA_TX_ATTR); - writel_relaxed(len, se->base + SE_DMA_TX_LEN); + writel(len, se->base + SE_DMA_TX_LEN); return 0; } EXPORT_SYMBOL(geni_se_tx_dma_prep); @@ -688,7 +701,7 @@ int geni_se_rx_dma_prep(struct geni_se *se, void *buf, size_t len, writel_relaxed(upper_32_bits(*iova), se->base + SE_DMA_RX_PTR_H); /* RX does not have EOT buffer type bit. So just reset RX_ATTR */ writel_relaxed(0, se->base + SE_DMA_RX_ATTR); - writel_relaxed(len, se->base + SE_DMA_RX_LEN); + writel(len, se->base + SE_DMA_RX_LEN); return 0; } EXPORT_SYMBOL(geni_se_rx_dma_prep); diff --git a/drivers/soc/qcom/smp2p.c b/drivers/soc/qcom/smp2p.c index 07183d731d7475..a9709aae54abbf 100644 --- a/drivers/soc/qcom/smp2p.c +++ b/drivers/soc/qcom/smp2p.c @@ -318,15 +318,16 @@ static int qcom_smp2p_inbound_entry(struct qcom_smp2p *smp2p, static int smp2p_update_bits(void *data, u32 mask, u32 value) { struct smp2p_entry *entry = data; + unsigned long flags; u32 orig; u32 val; - spin_lock(&entry->lock); + spin_lock_irqsave(&entry->lock, flags); val = orig = readl(entry->value); val &= ~mask; val |= value; writel(val, entry->value); - spin_unlock(&entry->lock); + spin_unlock_irqrestore(&entry->lock, flags); if (val != orig) qcom_smp2p_kick(entry->smp2p); diff --git a/drivers/soc/renesas/rmobile-sysc.c b/drivers/soc/renesas/rmobile-sysc.c index 54b616ad4a62aa..beb1c7211c3d62 100644 --- a/drivers/soc/renesas/rmobile-sysc.c +++ b/drivers/soc/renesas/rmobile-sysc.c @@ -327,6 +327,7 @@ static int __init rmobile_init_pm_domains(void) pmd = of_get_child_by_name(np, "pm-domains"); if (!pmd) { + iounmap(base); pr_warn("%pOF lacks pm-domains node\n", np); continue; } diff --git a/drivers/soc/rockchip/io-domain.c b/drivers/soc/rockchip/io-domain.c index eece97f97ef8f0..b29e829e815e5b 100644 --- a/drivers/soc/rockchip/io-domain.c +++ b/drivers/soc/rockchip/io-domain.c @@ -547,6 +547,7 @@ static int rockchip_iodomain_probe(struct platform_device *pdev) if (uV < 0) { dev_err(iod->dev, "Can't determine voltage: %s\n", supply_name); + ret = uV; goto unreg_notify; } diff --git a/drivers/soc/tegra/fuse/speedo-tegra210.c b/drivers/soc/tegra/fuse/speedo-tegra210.c index 70d3f6e1aa33d2..8050742237b76a 100644 --- a/drivers/soc/tegra/fuse/speedo-tegra210.c +++ b/drivers/soc/tegra/fuse/speedo-tegra210.c @@ -94,7 +94,7 @@ static int get_process_id(int value, const u32 *speedos, unsigned int num) unsigned int i; for (i = 0; i < num; i++) - if (value < speedos[num]) + if (value < speedos[i]) return i; return -EINVAL; diff --git a/drivers/soc/ti/knav_dma.c b/drivers/soc/ti/knav_dma.c index 8c863ecb1c6058..56597f6ea666a9 100644 --- a/drivers/soc/ti/knav_dma.c +++ b/drivers/soc/ti/knav_dma.c @@ -749,8 +749,9 @@ static int knav_dma_probe(struct platform_device *pdev) pm_runtime_enable(kdev->dev); ret = pm_runtime_get_sync(kdev->dev); if (ret < 0) { + pm_runtime_put_noidle(kdev->dev); dev_err(kdev->dev, "unable to enable pktdma, err %d\n", ret); - return ret; + goto err_pm_disable; } /* Initialise all packet dmas */ @@ -764,7 +765,8 @@ static int knav_dma_probe(struct platform_device *pdev) if (list_empty(&kdev->list)) { dev_err(dev, "no valid dma instance\n"); - return -ENODEV; + ret = -ENODEV; + goto err_put_sync; } debugfs_create_file("knav_dma", S_IFREG | S_IRUGO, NULL, NULL, @@ -772,6 +774,13 @@ static int knav_dma_probe(struct platform_device *pdev) device_ready = true; return ret; + +err_put_sync: + pm_runtime_put_sync(kdev->dev); +err_pm_disable: + pm_runtime_disable(kdev->dev); + + return ret; } static int knav_dma_remove(struct platform_device *pdev) diff --git a/drivers/soc/ti/knav_qmss_queue.c b/drivers/soc/ti/knav_qmss_queue.c index a460f201bf8e70..53e36d4328d1e9 100644 --- a/drivers/soc/ti/knav_qmss_queue.c +++ b/drivers/soc/ti/knav_qmss_queue.c @@ -1784,6 +1784,7 @@ static int knav_queue_probe(struct platform_device *pdev) pm_runtime_enable(&pdev->dev); ret = pm_runtime_get_sync(&pdev->dev); if (ret < 0) { + pm_runtime_put_noidle(&pdev->dev); dev_err(dev, "Failed to enable QMSS\n"); return ret; } @@ -1851,9 +1852,10 @@ static int knav_queue_probe(struct platform_device *pdev) if (ret) goto err; - regions = of_get_child_by_name(node, "descriptor-regions"); + regions = of_get_child_by_name(node, "descriptor-regions"); if (!regions) { dev_err(dev, "descriptor-regions not specified\n"); + ret = -ENODEV; goto err; } ret = knav_queue_setup_regions(kdev, regions); diff --git a/drivers/soc/ti/omap_prm.c b/drivers/soc/ti/omap_prm.c index 980b04c38fd940..4d41dc3cdce1f2 100644 --- a/drivers/soc/ti/omap_prm.c +++ b/drivers/soc/ti/omap_prm.c @@ -484,6 +484,10 @@ static int omap_reset_deassert(struct reset_controller_dev *rcdev, struct ti_prm_platform_data *pdata = dev_get_platdata(reset->dev); int ret = 0; + /* Nothing to do if the reset is already deasserted */ + if (!omap_reset_status(rcdev, id)) + return 0; + has_rstst = reset->prm->data->rstst || (reset->prm->data->flags & OMAP_PRM_HAS_RSTST); diff --git a/drivers/soundwire/master.c b/drivers/soundwire/master.c index 3488bb824e845f..9b05c9e25ebe48 100644 --- a/drivers/soundwire/master.c +++ b/drivers/soundwire/master.c @@ -8,6 +8,15 @@ #include #include "bus.h" +/* + * The 3s value for autosuspend will only be used if there are no + * devices physically attached on a bus segment. In practice enabling + * the bus operation will result in children devices become active and + * the master device will only suspend when all its children are no + * longer active. + */ +#define SDW_MASTER_SUSPEND_DELAY_MS 3000 + /* * The sysfs for properties reflects the MIPI description as given * in the MIPI DisCo spec @@ -154,7 +163,12 @@ int sdw_master_device_add(struct sdw_bus *bus, struct device *parent, bus->dev = &md->dev; bus->md = md; + pm_runtime_set_autosuspend_delay(&bus->md->dev, SDW_MASTER_SUSPEND_DELAY_MS); + pm_runtime_use_autosuspend(&bus->md->dev); + pm_runtime_mark_last_busy(&bus->md->dev); + pm_runtime_set_active(&bus->md->dev); pm_runtime_enable(&bus->md->dev); + pm_runtime_idle(&bus->md->dev); device_register_err: return ret; } diff --git a/drivers/soundwire/qcom.c b/drivers/soundwire/qcom.c index fbca4ebf63e927..6d22df01f35471 100644 --- a/drivers/soundwire/qcom.c +++ b/drivers/soundwire/qcom.c @@ -799,7 +799,7 @@ static int qcom_swrm_probe(struct platform_device *pdev) data = of_device_get_match_data(dev); ctrl->rows_index = sdw_find_row_index(data->default_rows); ctrl->cols_index = sdw_find_col_index(data->default_cols); -#if IS_ENABLED(CONFIG_SLIMBUS) +#if IS_REACHABLE(CONFIG_SLIMBUS) if (dev->parent->bus == &slimbus_bus) { #else if (false) { diff --git a/drivers/soundwire/sysfs_slave_dpn.c b/drivers/soundwire/sysfs_slave_dpn.c index 05a721ea9830a1..c4b6543c09fd67 100644 --- a/drivers/soundwire/sysfs_slave_dpn.c +++ b/drivers/soundwire/sysfs_slave_dpn.c @@ -37,6 +37,7 @@ static int field##_attribute_alloc(struct device *dev, \ return -ENOMEM; \ dpn_attr->N = N; \ dpn_attr->dir = dir; \ + sysfs_attr_init(&dpn_attr->dev_attr.attr); \ dpn_attr->format_string = format_string; \ dpn_attr->dev_attr.attr.name = __stringify(field); \ dpn_attr->dev_attr.attr.mode = 0444; \ diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig index 5cff60de8e8349..aadaea052f51d3 100644 --- a/drivers/spi/Kconfig +++ b/drivers/spi/Kconfig @@ -255,6 +255,8 @@ config SPI_DW_MMIO config SPI_DW_BT1 tristate "Baikal-T1 SPI driver for DW SPI core" depends on MIPS_BAIKAL_T1 || COMPILE_TEST + select MULTIPLEXER + select MUX_MMIO help Baikal-T1 SoC is equipped with three DW APB SSI-based MMIO SPI controllers. Two of them are pretty much normal: with IRQ, DMA, @@ -268,8 +270,6 @@ config SPI_DW_BT1 config SPI_DW_BT1_DIRMAP bool "Directly mapped Baikal-T1 Boot SPI flash support" depends on SPI_DW_BT1 - select MULTIPLEXER - select MUX_MMIO help Directly mapped SPI flash memory is an interface specific to the Baikal-T1 System Boot Controller. It is a 16MB MMIO region, which diff --git a/drivers/spi/atmel-quadspi.c b/drivers/spi/atmel-quadspi.c index 8c009c175f2c46..1e63fd4821f964 100644 --- a/drivers/spi/atmel-quadspi.c +++ b/drivers/spi/atmel-quadspi.c @@ -365,10 +365,14 @@ static int atmel_qspi_set_cfg(struct atmel_qspi *aq, if (dummy_cycles) ifr |= QSPI_IFR_NBDUM(dummy_cycles); - /* Set data enable */ - if (op->data.nbytes) + /* Set data enable and data transfer type. */ + if (op->data.nbytes) { ifr |= QSPI_IFR_DATAEN; + if (op->addr.nbytes) + ifr |= QSPI_IFR_TFRTYP_MEM; + } + /* * If the QSPI controller is set in regular SPI mode, set it in * Serial Memory Mode (SMM). @@ -393,7 +397,7 @@ static int atmel_qspi_set_cfg(struct atmel_qspi *aq, atmel_qspi_write(icr, aq, QSPI_WICR); atmel_qspi_write(ifr, aq, QSPI_IFR); } else { - if (op->data.dir == SPI_MEM_DATA_OUT) + if (op->data.nbytes && op->data.dir == SPI_MEM_DATA_OUT) ifr |= QSPI_IFR_SAMA5D2_WRITE_TRSFR; /* Set QSPI Instruction Frame registers */ @@ -535,7 +539,7 @@ static int atmel_qspi_probe(struct platform_device *pdev) struct resource *res; int irq, err = 0; - ctrl = spi_alloc_master(&pdev->dev, sizeof(*aq)); + ctrl = devm_spi_alloc_master(&pdev->dev, sizeof(*aq)); if (!ctrl) return -ENOMEM; @@ -557,8 +561,7 @@ static int atmel_qspi_probe(struct platform_device *pdev) aq->regs = devm_ioremap_resource(&pdev->dev, res); if (IS_ERR(aq->regs)) { dev_err(&pdev->dev, "missing registers\n"); - err = PTR_ERR(aq->regs); - goto exit; + return PTR_ERR(aq->regs); } /* Map the AHB memory */ @@ -566,8 +569,7 @@ static int atmel_qspi_probe(struct platform_device *pdev) aq->mem = devm_ioremap_resource(&pdev->dev, res); if (IS_ERR(aq->mem)) { dev_err(&pdev->dev, "missing AHB memory\n"); - err = PTR_ERR(aq->mem); - goto exit; + return PTR_ERR(aq->mem); } aq->mmap_size = resource_size(res); @@ -579,22 +581,21 @@ static int atmel_qspi_probe(struct platform_device *pdev) if (IS_ERR(aq->pclk)) { dev_err(&pdev->dev, "missing peripheral clock\n"); - err = PTR_ERR(aq->pclk); - goto exit; + return PTR_ERR(aq->pclk); } /* Enable the peripheral clock */ err = clk_prepare_enable(aq->pclk); if (err) { dev_err(&pdev->dev, "failed to enable the peripheral clock\n"); - goto exit; + return err; } aq->caps = of_device_get_match_data(&pdev->dev); if (!aq->caps) { dev_err(&pdev->dev, "Could not retrieve QSPI caps\n"); err = -EINVAL; - goto exit; + goto disable_pclk; } if (aq->caps->has_qspick) { @@ -638,8 +639,6 @@ static int atmel_qspi_probe(struct platform_device *pdev) clk_disable_unprepare(aq->qspick); disable_pclk: clk_disable_unprepare(aq->pclk); -exit: - spi_controller_put(ctrl); return err; } diff --git a/drivers/spi/spi-altera.c b/drivers/spi/spi-altera.c index 809bfff3690ab3..62ea0c9e321b4c 100644 --- a/drivers/spi/spi-altera.c +++ b/drivers/spi/spi-altera.c @@ -189,24 +189,26 @@ static int altera_spi_txrx(struct spi_master *master, /* send the first byte */ altera_spi_tx_word(hw); - } else { - while (hw->count < hw->len) { - altera_spi_tx_word(hw); - for (;;) { - altr_spi_readl(hw, ALTERA_SPI_STATUS, &val); - if (val & ALTERA_SPI_STATUS_RRDY_MSK) - break; + return 1; + } + + while (hw->count < hw->len) { + altera_spi_tx_word(hw); - cpu_relax(); - } + for (;;) { + altr_spi_readl(hw, ALTERA_SPI_STATUS, &val); + if (val & ALTERA_SPI_STATUS_RRDY_MSK) + break; - altera_spi_rx_word(hw); + cpu_relax(); } - spi_finalize_current_transfer(master); + + altera_spi_rx_word(hw); } + spi_finalize_current_transfer(master); - return t->len; + return 0; } static irqreturn_t altera_spi_irq(int irq, void *dev) @@ -252,7 +254,8 @@ static int altera_spi_probe(struct platform_device *pdev) dev_err(&pdev->dev, "Invalid number of chipselect: %hu\n", pdata->num_chipselect); - return -EINVAL; + err = -EINVAL; + goto exit; } master->num_chipselect = pdata->num_chipselect; diff --git a/drivers/spi/spi-ar934x.c b/drivers/spi/spi-ar934x.c index d08dec09d423d8..def32e0aaefe30 100644 --- a/drivers/spi/spi-ar934x.c +++ b/drivers/spi/spi-ar934x.c @@ -176,10 +176,11 @@ static int ar934x_spi_probe(struct platform_device *pdev) if (ret) return ret; - ctlr = spi_alloc_master(&pdev->dev, sizeof(*sp)); + ctlr = devm_spi_alloc_master(&pdev->dev, sizeof(*sp)); if (!ctlr) { dev_info(&pdev->dev, "failed to allocate spi controller\n"); - return -ENOMEM; + ret = -ENOMEM; + goto err_clk_disable; } /* disable flash mapping and expose spi controller registers */ @@ -202,7 +203,13 @@ static int ar934x_spi_probe(struct platform_device *pdev) sp->clk_freq = clk_get_rate(clk); sp->ctlr = ctlr; - return devm_spi_register_controller(&pdev->dev, ctlr); + ret = spi_register_controller(ctlr); + if (!ret) + return 0; + +err_clk_disable: + clk_disable_unprepare(clk); + return ret; } static int ar934x_spi_remove(struct platform_device *pdev) @@ -213,6 +220,7 @@ static int ar934x_spi_remove(struct platform_device *pdev) ctlr = dev_get_drvdata(&pdev->dev); sp = spi_controller_get_devdata(ctlr); + spi_unregister_controller(ctlr); clk_disable_unprepare(sp->clk); return 0; diff --git a/drivers/spi/spi-bcm63xx-hsspi.c b/drivers/spi/spi-bcm63xx-hsspi.c index 9909b18f3c5a53..1f08d7553f079c 100644 --- a/drivers/spi/spi-bcm63xx-hsspi.c +++ b/drivers/spi/spi-bcm63xx-hsspi.c @@ -494,8 +494,10 @@ static int bcm63xx_hsspi_resume(struct device *dev) if (bs->pll_clk) { ret = clk_prepare_enable(bs->pll_clk); - if (ret) + if (ret) { + clk_disable_unprepare(bs->clk); return ret; + } } spi_master_resume(master); diff --git a/drivers/spi/spi-cadence.c b/drivers/spi/spi-cadence.c index 70467b9d61baa3..a3afd1b9ac567b 100644 --- a/drivers/spi/spi-cadence.c +++ b/drivers/spi/spi-cadence.c @@ -115,6 +115,7 @@ struct cdns_spi { void __iomem *regs; struct clk *ref_clk; struct clk *pclk; + unsigned int clk_rate; u32 speed_hz; const u8 *txbuf; u8 *rxbuf; @@ -250,7 +251,7 @@ static void cdns_spi_config_clock_freq(struct spi_device *spi, u32 ctrl_reg, baud_rate_val; unsigned long frequency; - frequency = clk_get_rate(xspi->ref_clk); + frequency = xspi->clk_rate; ctrl_reg = cdns_spi_read(xspi, CDNS_SPI_CR); @@ -558,8 +559,9 @@ static int cdns_spi_probe(struct platform_device *pdev) master->auto_runtime_pm = true; master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH; + xspi->clk_rate = clk_get_rate(xspi->ref_clk); /* Set to default valid value */ - master->max_speed_hz = clk_get_rate(xspi->ref_clk) / 4; + master->max_speed_hz = xspi->clk_rate / 4; xspi->speed_hz = master->max_speed_hz; master->bits_per_word_mask = SPI_BPW_MASK(8); diff --git a/drivers/spi/spi-davinci.c b/drivers/spi/spi-davinci.c index 818f2b22875d2b..7453a1dbbc0614 100644 --- a/drivers/spi/spi-davinci.c +++ b/drivers/spi/spi-davinci.c @@ -1040,13 +1040,13 @@ static int davinci_spi_remove(struct platform_device *pdev) spi_bitbang_stop(&dspi->bitbang); clk_disable_unprepare(dspi->clk); - spi_master_put(master); if (dspi->dma_rx) { dma_release_channel(dspi->dma_rx); dma_release_channel(dspi->dma_tx); } + spi_master_put(master); return 0; } diff --git a/drivers/spi/spi-dw-bt1.c b/drivers/spi/spi-dw-bt1.c index f382dfad784213..c279b7891e3ac0 100644 --- a/drivers/spi/spi-dw-bt1.c +++ b/drivers/spi/spi-dw-bt1.c @@ -280,8 +280,10 @@ static int dw_spi_bt1_probe(struct platform_device *pdev) dws->bus_num = pdev->id; dws->reg_io_width = 4; dws->max_freq = clk_get_rate(dwsbt1->clk); - if (!dws->max_freq) + if (!dws->max_freq) { + ret = -EINVAL; goto err_disable_clk; + } init_func = device_get_match_data(&pdev->dev); ret = init_func(pdev, dwsbt1); diff --git a/drivers/spi/spi-fsl-dspi.c b/drivers/spi/spi-fsl-dspi.c index 1a08c1d584abe0..0287366874882d 100644 --- a/drivers/spi/spi-fsl-dspi.c +++ b/drivers/spi/spi-fsl-dspi.c @@ -1165,7 +1165,7 @@ static int dspi_init(struct fsl_dspi *dspi) unsigned int mcr; /* Set idle states for all chip select signals to high */ - mcr = SPI_MCR_PCSIS(GENMASK(dspi->ctlr->num_chipselect - 1, 0)); + mcr = SPI_MCR_PCSIS(GENMASK(dspi->ctlr->max_native_cs - 1, 0)); if (dspi->devtype_data->trans_mode == DSPI_XSPI_MODE) mcr |= SPI_MCR_XSPI; @@ -1250,7 +1250,7 @@ static int dspi_probe(struct platform_device *pdev) pdata = dev_get_platdata(&pdev->dev); if (pdata) { - ctlr->num_chipselect = pdata->cs_num; + ctlr->num_chipselect = ctlr->max_native_cs = pdata->cs_num; ctlr->bus_num = pdata->bus_num; /* Only Coldfire uses platform data */ @@ -1263,7 +1263,7 @@ static int dspi_probe(struct platform_device *pdev) dev_err(&pdev->dev, "can't get spi-num-chipselects\n"); goto out_ctlr_put; } - ctlr->num_chipselect = cs_num; + ctlr->num_chipselect = ctlr->max_native_cs = cs_num; of_property_read_u32(np, "bus-num", &bus_num); ctlr->bus_num = bus_num; diff --git a/drivers/spi/spi-fsl-spi.c b/drivers/spi/spi-fsl-spi.c index 299e9870cf58d4..6d8e0a05a53554 100644 --- a/drivers/spi/spi-fsl-spi.c +++ b/drivers/spi/spi-fsl-spi.c @@ -115,14 +115,13 @@ static void fsl_spi_chipselect(struct spi_device *spi, int value) { struct mpc8xxx_spi *mpc8xxx_spi = spi_master_get_devdata(spi->master); struct fsl_spi_platform_data *pdata; - bool pol = spi->mode & SPI_CS_HIGH; struct spi_mpc8xxx_cs *cs = spi->controller_state; pdata = spi->dev.parent->parent->platform_data; if (value == BITBANG_CS_INACTIVE) { if (pdata->cs_control) - pdata->cs_control(spi, !pol); + pdata->cs_control(spi, false); } if (value == BITBANG_CS_ACTIVE) { @@ -134,7 +133,7 @@ static void fsl_spi_chipselect(struct spi_device *spi, int value) fsl_spi_change_mode(spi); if (pdata->cs_control) - pdata->cs_control(spi, pol); + pdata->cs_control(spi, true); } } @@ -716,10 +715,11 @@ static int of_fsl_spi_probe(struct platform_device *ofdev) type = fsl_spi_get_type(&ofdev->dev); if (type == TYPE_FSL) { struct fsl_spi_platform_data *pdata = dev_get_platdata(dev); + bool spisel_boot = false; #if IS_ENABLED(CONFIG_FSL_SOC) struct mpc8xxx_spi_probe_info *pinfo = to_of_pinfo(pdata); - bool spisel_boot = of_property_read_bool(np, "fsl,spisel_boot"); + spisel_boot = of_property_read_bool(np, "fsl,spisel_boot"); if (spisel_boot) { pinfo->immr_spi_cs = ioremap(get_immrbase() + IMMR_SPI_CS_OFFSET, 4); if (!pinfo->immr_spi_cs) @@ -734,10 +734,14 @@ static int of_fsl_spi_probe(struct platform_device *ofdev) * supported on the GRLIB variant. */ ret = gpiod_count(dev, "cs"); - if (ret <= 0) + if (ret < 0) + ret = 0; + if (ret == 0 && !spisel_boot) { pdata->max_chipselect = 1; - else + } else { + pdata->max_chipselect = ret + spisel_boot; pdata->cs_control = fsl_spi_cs_control; + } } ret = of_address_to_resource(np, 0, &mem); diff --git a/drivers/spi/spi-geni-qcom.c b/drivers/spi/spi-geni-qcom.c index 25810a7eef1011..01ef79f15b0248 100644 --- a/drivers/spi/spi-geni-qcom.c +++ b/drivers/spi/spi-geni-qcom.c @@ -83,6 +83,7 @@ struct spi_geni_master { spinlock_t lock; int irq; bool cs_flag; + bool abort_failed; }; static int get_spi_clk_cfg(unsigned int speed_hz, @@ -141,8 +142,49 @@ static void handle_fifo_timeout(struct spi_master *spi, spin_unlock_irq(&mas->lock); time_left = wait_for_completion_timeout(&mas->abort_done, HZ); - if (!time_left) + if (!time_left) { dev_err(mas->dev, "Failed to cancel/abort m_cmd\n"); + + /* + * No need for a lock since SPI core has a lock and we never + * access this from an interrupt. + */ + mas->abort_failed = true; + } +} + +static bool spi_geni_is_abort_still_pending(struct spi_geni_master *mas) +{ + struct geni_se *se = &mas->se; + u32 m_irq, m_irq_en; + + if (!mas->abort_failed) + return false; + + /* + * The only known case where a transfer times out and then a cancel + * times out then an abort times out is if something is blocking our + * interrupt handler from running. Avoid starting any new transfers + * until that sorts itself out. + */ + spin_lock_irq(&mas->lock); + m_irq = readl(se->base + SE_GENI_M_IRQ_STATUS); + m_irq_en = readl(se->base + SE_GENI_M_IRQ_EN); + spin_unlock_irq(&mas->lock); + + if (m_irq & m_irq_en) { + dev_err(mas->dev, "Interrupts pending after abort: %#010x\n", + m_irq & m_irq_en); + return true; + } + + /* + * If we're here the problem resolved itself so no need to check more + * on future transfers. + */ + mas->abort_failed = false; + + return false; } static void spi_geni_set_cs(struct spi_device *slv, bool set_flag) @@ -158,9 +200,15 @@ static void spi_geni_set_cs(struct spi_device *slv, bool set_flag) if (set_flag == mas->cs_flag) return; + pm_runtime_get_sync(mas->dev); + + if (spi_geni_is_abort_still_pending(mas)) { + dev_err(mas->dev, "Can't set chip select\n"); + goto exit; + } + mas->cs_flag = set_flag; - pm_runtime_get_sync(mas->dev); spin_lock_irq(&mas->lock); reinit_completion(&mas->cs_done); if (set_flag) @@ -173,6 +221,7 @@ static void spi_geni_set_cs(struct spi_device *slv, bool set_flag) if (!time_left) handle_fifo_timeout(spi, NULL); +exit: pm_runtime_put(mas->dev); } @@ -280,6 +329,9 @@ static int spi_geni_prepare_message(struct spi_master *spi, int ret; struct spi_geni_master *mas = spi_master_get_devdata(spi); + if (spi_geni_is_abort_still_pending(mas)) + return -EBUSY; + ret = setup_fifo_params(spi_msg->spi, spi); if (ret) dev_err(mas->dev, "Couldn't select mode %d\n", ret); @@ -354,6 +406,12 @@ static bool geni_spi_handle_tx(struct spi_geni_master *mas) unsigned int bytes_per_fifo_word = geni_byte_per_fifo_word(mas); unsigned int i = 0; + /* Stop the watermark IRQ if nothing to send */ + if (!mas->cur_xfer) { + writel(0, se->base + SE_GENI_TX_WATERMARK_REG); + return false; + } + max_bytes = (mas->tx_fifo_depth - mas->tx_wm) * bytes_per_fifo_word; if (mas->tx_rem_bytes < max_bytes) max_bytes = mas->tx_rem_bytes; @@ -396,6 +454,14 @@ static void geni_spi_handle_rx(struct spi_geni_master *mas) if (rx_last_byte_valid && rx_last_byte_valid < 4) rx_bytes -= bytes_per_fifo_word - rx_last_byte_valid; } + + /* Clear out the FIFO and bail if nowhere to put it */ + if (!mas->cur_xfer) { + for (i = 0; i < DIV_ROUND_UP(rx_bytes, bytes_per_fifo_word); i++) + readl(se->base + SE_GENI_RX_FIFOn); + return; + } + if (mas->rx_rem_bytes < rx_bytes) rx_bytes = mas->rx_rem_bytes; @@ -495,6 +561,9 @@ static int spi_geni_transfer_one(struct spi_master *spi, { struct spi_geni_master *mas = spi_master_get_devdata(spi); + if (spi_geni_is_abort_still_pending(mas)) + return -EBUSY; + /* Terminate and return success for 0 byte length transfer */ if (!xfer->len) return 0; @@ -603,7 +672,7 @@ static int spi_geni_probe(struct platform_device *pdev) if (IS_ERR(clk)) return PTR_ERR(clk); - spi = spi_alloc_master(dev, sizeof(*mas)); + spi = devm_spi_alloc_master(dev, sizeof(*mas)); if (!spi) return -ENOMEM; @@ -673,7 +742,6 @@ static int spi_geni_probe(struct platform_device *pdev) free_irq(mas->irq, spi); spi_geni_probe_runtime_disable: pm_runtime_disable(dev); - spi_master_put(spi); dev_pm_opp_of_remove_table(&pdev->dev); put_clkname: dev_pm_opp_put_clkname(mas->se.opp_table); diff --git a/drivers/spi/spi-gpio.c b/drivers/spi/spi-gpio.c index 7ceb0ba27b755c..0584f4d2fde29b 100644 --- a/drivers/spi/spi-gpio.c +++ b/drivers/spi/spi-gpio.c @@ -350,11 +350,6 @@ static int spi_gpio_probe_pdata(struct platform_device *pdev, return 0; } -static void spi_gpio_put(void *data) -{ - spi_master_put(data); -} - static int spi_gpio_probe(struct platform_device *pdev) { int status; @@ -363,16 +358,10 @@ static int spi_gpio_probe(struct platform_device *pdev) struct device *dev = &pdev->dev; struct spi_bitbang *bb; - master = spi_alloc_master(dev, sizeof(*spi_gpio)); + master = devm_spi_alloc_master(dev, sizeof(*spi_gpio)); if (!master) return -ENOMEM; - status = devm_add_action_or_reset(&pdev->dev, spi_gpio_put, master); - if (status) { - spi_master_put(master); - return status; - } - if (pdev->dev.of_node) status = spi_gpio_probe_dt(pdev, master); else @@ -432,7 +421,7 @@ static int spi_gpio_probe(struct platform_device *pdev) if (status) return status; - return devm_spi_register_master(&pdev->dev, spi_master_get(master)); + return devm_spi_register_master(&pdev->dev, master); } MODULE_ALIAS("platform:" DRIVER_NAME); diff --git a/drivers/spi/spi-img-spfi.c b/drivers/spi/spi-img-spfi.c index b068537375d609..5f05d519fbbd09 100644 --- a/drivers/spi/spi-img-spfi.c +++ b/drivers/spi/spi-img-spfi.c @@ -731,8 +731,10 @@ static int img_spfi_resume(struct device *dev) int ret; ret = pm_runtime_get_sync(dev); - if (ret) + if (ret) { + pm_runtime_put_noidle(dev); return ret; + } spfi_reset(spfi); pm_runtime_put(dev); diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c index 0b597905ee72c6..8df5e973404f04 100644 --- a/drivers/spi/spi-imx.c +++ b/drivers/spi/spi-imx.c @@ -1538,6 +1538,7 @@ spi_imx_prepare_message(struct spi_master *master, struct spi_message *msg) ret = pm_runtime_get_sync(spi_imx->dev); if (ret < 0) { + pm_runtime_put_noidle(spi_imx->dev); dev_err(spi_imx->dev, "failed to enable clock\n"); return ret; } @@ -1748,6 +1749,7 @@ static int spi_imx_remove(struct platform_device *pdev) ret = pm_runtime_get_sync(spi_imx->dev); if (ret < 0) { + pm_runtime_put_noidle(spi_imx->dev); dev_err(spi_imx->dev, "failed to enable clock\n"); return ret; } diff --git a/drivers/spi/spi-mem.c b/drivers/spi/spi-mem.c index ef53290b7d24dc..4682f49dc73307 100644 --- a/drivers/spi/spi-mem.c +++ b/drivers/spi/spi-mem.c @@ -243,6 +243,7 @@ static int spi_mem_access_start(struct spi_mem *mem) ret = pm_runtime_get_sync(ctlr->dev.parent); if (ret < 0) { + pm_runtime_put_noidle(ctlr->dev.parent); dev_err(&ctlr->dev, "Failed to power device: %d\n", ret); return ret; diff --git a/drivers/spi/spi-mt7621.c b/drivers/spi/spi-mt7621.c index 2c3b7a2a1ec77a..b4b9b7309b5e94 100644 --- a/drivers/spi/spi-mt7621.c +++ b/drivers/spi/spi-mt7621.c @@ -350,9 +350,10 @@ static int mt7621_spi_probe(struct platform_device *pdev) if (status) return status; - master = spi_alloc_master(&pdev->dev, sizeof(*rs)); + master = devm_spi_alloc_master(&pdev->dev, sizeof(*rs)); if (!master) { dev_info(&pdev->dev, "master allocation failed\n"); + clk_disable_unprepare(clk); return -ENOMEM; } @@ -377,10 +378,15 @@ static int mt7621_spi_probe(struct platform_device *pdev) ret = device_reset(&pdev->dev); if (ret) { dev_err(&pdev->dev, "SPI reset failed!\n"); + clk_disable_unprepare(clk); return ret; } - return devm_spi_register_controller(&pdev->dev, master); + ret = spi_register_controller(master); + if (ret) + clk_disable_unprepare(clk); + + return ret; } static int mt7621_spi_remove(struct platform_device *pdev) @@ -391,6 +397,7 @@ static int mt7621_spi_remove(struct platform_device *pdev) master = dev_get_drvdata(&pdev->dev); rs = spi_controller_get_devdata(master); + spi_unregister_controller(master); clk_disable_unprepare(rs->clk); return 0; diff --git a/drivers/spi/spi-mtk-nor.c b/drivers/spi/spi-mtk-nor.c index b97f26a60cbef4..288f6c2bbd5730 100644 --- a/drivers/spi/spi-mtk-nor.c +++ b/drivers/spi/spi-mtk-nor.c @@ -768,7 +768,7 @@ static int mtk_nor_probe(struct platform_device *pdev) return -EINVAL; } - ctlr = spi_alloc_master(&pdev->dev, sizeof(*sp)); + ctlr = devm_spi_alloc_master(&pdev->dev, sizeof(*sp)); if (!ctlr) { dev_err(&pdev->dev, "failed to allocate spi controller\n"); return -ENOMEM; diff --git a/drivers/spi/spi-mxic.c b/drivers/spi/spi-mxic.c index 8c630acb0110bf..96b418293bf2a4 100644 --- a/drivers/spi/spi-mxic.c +++ b/drivers/spi/spi-mxic.c @@ -529,7 +529,7 @@ static int mxic_spi_probe(struct platform_device *pdev) struct mxic_spi *mxic; int ret; - master = spi_alloc_master(&pdev->dev, sizeof(struct mxic_spi)); + master = devm_spi_alloc_master(&pdev->dev, sizeof(struct mxic_spi)); if (!master) return -ENOMEM; @@ -574,15 +574,9 @@ static int mxic_spi_probe(struct platform_device *pdev) ret = spi_register_master(master); if (ret) { dev_err(&pdev->dev, "spi_register_master failed\n"); - goto err_put_master; + pm_runtime_disable(&pdev->dev); } - return 0; - -err_put_master: - spi_master_put(master); - pm_runtime_disable(&pdev->dev); - return ret; } diff --git a/drivers/spi/spi-mxs.c b/drivers/spi/spi-mxs.c index 918918a9e04914..435309b09227ea 100644 --- a/drivers/spi/spi-mxs.c +++ b/drivers/spi/spi-mxs.c @@ -607,6 +607,7 @@ static int mxs_spi_probe(struct platform_device *pdev) ret = pm_runtime_get_sync(ssp->dev); if (ret < 0) { + pm_runtime_put_noidle(ssp->dev); dev_err(ssp->dev, "runtime_get_sync failed\n"); goto out_pm_runtime_disable; } diff --git a/drivers/spi/spi-npcm-fiu.c b/drivers/spi/spi-npcm-fiu.c index 1cb9329de945e1..b62471ab6d7f25 100644 --- a/drivers/spi/spi-npcm-fiu.c +++ b/drivers/spi/spi-npcm-fiu.c @@ -677,7 +677,7 @@ static int npcm_fiu_probe(struct platform_device *pdev) struct npcm_fiu_spi *fiu; void __iomem *regbase; struct resource *res; - int id; + int id, ret; ctrl = devm_spi_alloc_master(dev, sizeof(*fiu)); if (!ctrl) @@ -735,7 +735,11 @@ static int npcm_fiu_probe(struct platform_device *pdev) ctrl->num_chipselect = fiu->info->max_cs; ctrl->dev.of_node = dev->of_node; - return devm_spi_register_master(dev, ctrl); + ret = devm_spi_register_master(dev, ctrl); + if (ret) + clk_disable_unprepare(fiu->clk); + + return ret; } static int npcm_fiu_remove(struct platform_device *pdev) diff --git a/drivers/spi/spi-pic32.c b/drivers/spi/spi-pic32.c index 156961b4ca86f9..104bde153efd22 100644 --- a/drivers/spi/spi-pic32.c +++ b/drivers/spi/spi-pic32.c @@ -839,6 +839,7 @@ static int pic32_spi_probe(struct platform_device *pdev) return 0; err_bailout: + pic32_spi_dma_unprep(pic32s); clk_disable_unprepare(pic32s->clk); err_master: spi_master_put(master); diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c index 814268405ab0b8..d6b534d38e5dad 100644 --- a/drivers/spi/spi-pxa2xx.c +++ b/drivers/spi/spi-pxa2xx.c @@ -1686,9 +1686,9 @@ static int pxa2xx_spi_probe(struct platform_device *pdev) } if (platform_info->is_slave) - controller = spi_alloc_slave(dev, sizeof(struct driver_data)); + controller = devm_spi_alloc_slave(dev, sizeof(*drv_data)); else - controller = spi_alloc_master(dev, sizeof(struct driver_data)); + controller = devm_spi_alloc_master(dev, sizeof(*drv_data)); if (!controller) { dev_err(&pdev->dev, "cannot alloc spi_controller\n"); @@ -1911,7 +1911,6 @@ static int pxa2xx_spi_probe(struct platform_device *pdev) free_irq(ssp->irq, drv_data); out_error_controller_alloc: - spi_controller_put(controller); pxa_ssp_free(ssp); return status; } diff --git a/drivers/spi/spi-qcom-qspi.c b/drivers/spi/spi-qcom-qspi.c index 5eed88af6899b9..8863be37088456 100644 --- a/drivers/spi/spi-qcom-qspi.c +++ b/drivers/spi/spi-qcom-qspi.c @@ -462,7 +462,7 @@ static int qcom_qspi_probe(struct platform_device *pdev) dev = &pdev->dev; - master = spi_alloc_master(dev, sizeof(*ctrl)); + master = devm_spi_alloc_master(dev, sizeof(*ctrl)); if (!master) return -ENOMEM; @@ -473,54 +473,49 @@ static int qcom_qspi_probe(struct platform_device *pdev) spin_lock_init(&ctrl->lock); ctrl->dev = dev; ctrl->base = devm_platform_ioremap_resource(pdev, 0); - if (IS_ERR(ctrl->base)) { - ret = PTR_ERR(ctrl->base); - goto exit_probe_master_put; - } + if (IS_ERR(ctrl->base)) + return PTR_ERR(ctrl->base); ctrl->clks = devm_kcalloc(dev, QSPI_NUM_CLKS, sizeof(*ctrl->clks), GFP_KERNEL); - if (!ctrl->clks) { - ret = -ENOMEM; - goto exit_probe_master_put; - } + if (!ctrl->clks) + return -ENOMEM; ctrl->clks[QSPI_CLK_CORE].id = "core"; ctrl->clks[QSPI_CLK_IFACE].id = "iface"; ret = devm_clk_bulk_get(dev, QSPI_NUM_CLKS, ctrl->clks); if (ret) - goto exit_probe_master_put; + return ret; ctrl->icc_path_cpu_to_qspi = devm_of_icc_get(dev, "qspi-config"); - if (IS_ERR(ctrl->icc_path_cpu_to_qspi)) { - ret = dev_err_probe(dev, PTR_ERR(ctrl->icc_path_cpu_to_qspi), - "Failed to get cpu path\n"); - goto exit_probe_master_put; - } + if (IS_ERR(ctrl->icc_path_cpu_to_qspi)) + return dev_err_probe(dev, PTR_ERR(ctrl->icc_path_cpu_to_qspi), + "Failed to get cpu path\n"); + /* Set BW vote for register access */ ret = icc_set_bw(ctrl->icc_path_cpu_to_qspi, Bps_to_icc(1000), Bps_to_icc(1000)); if (ret) { dev_err(ctrl->dev, "%s: ICC BW voting failed for cpu: %d\n", __func__, ret); - goto exit_probe_master_put; + return ret; } ret = icc_disable(ctrl->icc_path_cpu_to_qspi); if (ret) { dev_err(ctrl->dev, "%s: ICC disable failed for cpu: %d\n", __func__, ret); - goto exit_probe_master_put; + return ret; } ret = platform_get_irq(pdev, 0); if (ret < 0) - goto exit_probe_master_put; + return ret; ret = devm_request_irq(dev, ret, qcom_qspi_irq, IRQF_TRIGGER_HIGH, dev_name(dev), ctrl); if (ret) { dev_err(dev, "Failed to request irq %d\n", ret); - goto exit_probe_master_put; + return ret; } master->max_speed_hz = 300000000; @@ -537,10 +532,8 @@ static int qcom_qspi_probe(struct platform_device *pdev) master->auto_runtime_pm = true; ctrl->opp_table = dev_pm_opp_set_clkname(&pdev->dev, "core"); - if (IS_ERR(ctrl->opp_table)) { - ret = PTR_ERR(ctrl->opp_table); - goto exit_probe_master_put; - } + if (IS_ERR(ctrl->opp_table)) + return PTR_ERR(ctrl->opp_table); /* OPP table is optional */ ret = dev_pm_opp_of_add_table(&pdev->dev); if (ret && ret != -ENODEV) { @@ -562,9 +555,6 @@ static int qcom_qspi_probe(struct platform_device *pdev) exit_probe_put_clkname: dev_pm_opp_put_clkname(ctrl->opp_table); -exit_probe_master_put: - spi_master_put(master); - return ret; } diff --git a/drivers/spi/spi-rb4xx.c b/drivers/spi/spi-rb4xx.c index 8aa51beb4ff3eb..9f97d18a05c102 100644 --- a/drivers/spi/spi-rb4xx.c +++ b/drivers/spi/spi-rb4xx.c @@ -143,7 +143,7 @@ static int rb4xx_spi_probe(struct platform_device *pdev) if (IS_ERR(spi_base)) return PTR_ERR(spi_base); - master = spi_alloc_master(&pdev->dev, sizeof(*rbspi)); + master = devm_spi_alloc_master(&pdev->dev, sizeof(*rbspi)); if (!master) return -ENOMEM; diff --git a/drivers/spi/spi-rpc-if.c b/drivers/spi/spi-rpc-if.c index ed3e548227f474..3579675485a5ef 100644 --- a/drivers/spi/spi-rpc-if.c +++ b/drivers/spi/spi-rpc-if.c @@ -134,7 +134,7 @@ static int rpcif_spi_probe(struct platform_device *pdev) struct rpcif *rpc; int error; - ctlr = spi_alloc_master(&pdev->dev, sizeof(*rpc)); + ctlr = devm_spi_alloc_master(&pdev->dev, sizeof(*rpc)); if (!ctlr) return -ENOMEM; @@ -159,13 +159,8 @@ static int rpcif_spi_probe(struct platform_device *pdev) error = spi_register_controller(ctlr); if (error) { dev_err(&pdev->dev, "spi_register_controller failed\n"); - goto err_put_ctlr; + rpcif_disable_rpm(rpc); } - return 0; - -err_put_ctlr: - rpcif_disable_rpm(rpc); - spi_controller_put(ctlr); return error; } diff --git a/drivers/spi/spi-sc18is602.c b/drivers/spi/spi-sc18is602.c index ee0f3edf49cdbd..297c512069a578 100644 --- a/drivers/spi/spi-sc18is602.c +++ b/drivers/spi/spi-sc18is602.c @@ -238,13 +238,12 @@ static int sc18is602_probe(struct i2c_client *client, struct sc18is602_platform_data *pdata = dev_get_platdata(dev); struct sc18is602 *hw; struct spi_master *master; - int error; if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C | I2C_FUNC_SMBUS_WRITE_BYTE_DATA)) return -EINVAL; - master = spi_alloc_master(dev, sizeof(struct sc18is602)); + master = devm_spi_alloc_master(dev, sizeof(struct sc18is602)); if (!master) return -ENOMEM; @@ -298,15 +297,7 @@ static int sc18is602_probe(struct i2c_client *client, master->min_speed_hz = hw->freq / 128; master->max_speed_hz = hw->freq / 4; - error = devm_spi_register_master(dev, master); - if (error) - goto error_reg; - - return 0; - -error_reg: - spi_master_put(master); - return error; + return devm_spi_register_master(dev, master); } static const struct i2c_device_id sc18is602_id[] = { diff --git a/drivers/spi/spi-sh.c b/drivers/spi/spi-sh.c index 20bdae5fdf3b82..15123a8f41e1e7 100644 --- a/drivers/spi/spi-sh.c +++ b/drivers/spi/spi-sh.c @@ -440,7 +440,7 @@ static int spi_sh_probe(struct platform_device *pdev) if (irq < 0) return irq; - master = spi_alloc_master(&pdev->dev, sizeof(struct spi_sh_data)); + master = devm_spi_alloc_master(&pdev->dev, sizeof(struct spi_sh_data)); if (master == NULL) { dev_err(&pdev->dev, "spi_alloc_master error.\n"); return -ENOMEM; @@ -458,16 +458,14 @@ static int spi_sh_probe(struct platform_device *pdev) break; default: dev_err(&pdev->dev, "No support width\n"); - ret = -ENODEV; - goto error1; + return -ENODEV; } ss->irq = irq; ss->master = master; ss->addr = devm_ioremap(&pdev->dev, res->start, resource_size(res)); if (ss->addr == NULL) { dev_err(&pdev->dev, "ioremap error.\n"); - ret = -ENOMEM; - goto error1; + return -ENOMEM; } INIT_LIST_HEAD(&ss->queue); spin_lock_init(&ss->lock); @@ -477,7 +475,7 @@ static int spi_sh_probe(struct platform_device *pdev) ret = request_irq(irq, spi_sh_irq, 0, "spi_sh", ss); if (ret < 0) { dev_err(&pdev->dev, "request_irq error\n"); - goto error1; + return ret; } master->num_chipselect = 2; @@ -496,9 +494,6 @@ static int spi_sh_probe(struct platform_device *pdev) error3: free_irq(irq, ss); - error1: - spi_master_put(master); - return ret; } diff --git a/drivers/spi/spi-sprd.c b/drivers/spi/spi-sprd.c index 635738f54c7318..b41a75749b498e 100644 --- a/drivers/spi/spi-sprd.c +++ b/drivers/spi/spi-sprd.c @@ -1010,6 +1010,7 @@ static int sprd_spi_remove(struct platform_device *pdev) ret = pm_runtime_get_sync(ss->dev); if (ret < 0) { + pm_runtime_put_noidle(ss->dev); dev_err(ss->dev, "failed to resume SPI controller\n"); return ret; } diff --git a/drivers/spi/spi-st-ssc4.c b/drivers/spi/spi-st-ssc4.c index 77d26d64541a5a..6c44dda9ee8c59 100644 --- a/drivers/spi/spi-st-ssc4.c +++ b/drivers/spi/spi-st-ssc4.c @@ -375,13 +375,14 @@ static int spi_st_probe(struct platform_device *pdev) ret = devm_spi_register_master(&pdev->dev, master); if (ret) { dev_err(&pdev->dev, "Failed to register master\n"); - goto clk_disable; + goto rpm_disable; } return 0; -clk_disable: +rpm_disable: pm_runtime_disable(&pdev->dev); +clk_disable: clk_disable_unprepare(spi_st->clk); put_master: spi_master_put(master); diff --git a/drivers/spi/spi-stm32-qspi.c b/drivers/spi/spi-stm32-qspi.c index a900962b4336ea..947e6b9dc9f4d5 100644 --- a/drivers/spi/spi-stm32-qspi.c +++ b/drivers/spi/spi-stm32-qspi.c @@ -434,8 +434,10 @@ static int stm32_qspi_exec_op(struct spi_mem *mem, const struct spi_mem_op *op) int ret; ret = pm_runtime_get_sync(qspi->dev); - if (ret < 0) + if (ret < 0) { + pm_runtime_put_noidle(qspi->dev); return ret; + } mutex_lock(&qspi->lock); ret = stm32_qspi_send(mem, op); @@ -462,8 +464,10 @@ static int stm32_qspi_setup(struct spi_device *spi) return -EINVAL; ret = pm_runtime_get_sync(qspi->dev); - if (ret < 0) + if (ret < 0) { + pm_runtime_put_noidle(qspi->dev); return ret; + } presc = DIV_ROUND_UP(qspi->clk_rate, spi->max_speed_hz) - 1; diff --git a/drivers/spi/spi-stm32.c b/drivers/spi/spi-stm32.c index 2cc850eb8922da..6017209c6d2f7b 100644 --- a/drivers/spi/spi-stm32.c +++ b/drivers/spi/spi-stm32.c @@ -493,9 +493,9 @@ static u32 stm32h7_spi_prepare_fthlv(struct stm32_spi *spi, u32 xfer_len) /* align packet size with data registers access */ if (spi->cur_bpw > 8) - fthlv -= (fthlv % 2); /* multiple of 2 */ + fthlv += (fthlv % 2) ? 1 : 0; else - fthlv -= (fthlv % 4); /* multiple of 4 */ + fthlv += (fthlv % 4) ? (4 - (fthlv % 4)) : 0; if (!fthlv) fthlv = 1; @@ -2062,6 +2062,7 @@ static int stm32_spi_resume(struct device *dev) ret = pm_runtime_get_sync(dev); if (ret < 0) { + pm_runtime_put_noidle(dev); dev_err(dev, "Unable to power device:%d\n", ret); return ret; } diff --git a/drivers/spi/spi-synquacer.c b/drivers/spi/spi-synquacer.c index 42e82dbe3d4102..8cdca6ab809894 100644 --- a/drivers/spi/spi-synquacer.c +++ b/drivers/spi/spi-synquacer.c @@ -657,7 +657,8 @@ static int synquacer_spi_probe(struct platform_device *pdev) if (!master->max_speed_hz) { dev_err(&pdev->dev, "missing clock source\n"); - return -EINVAL; + ret = -EINVAL; + goto disable_clk; } master->min_speed_hz = master->max_speed_hz / 254; @@ -670,7 +671,7 @@ static int synquacer_spi_probe(struct platform_device *pdev) rx_irq = platform_get_irq(pdev, 0); if (rx_irq <= 0) { ret = rx_irq; - goto put_spi; + goto disable_clk; } snprintf(sspi->rx_irq_name, SYNQUACER_HSSPI_IRQ_NAME_MAX, "%s-rx", dev_name(&pdev->dev)); @@ -678,13 +679,13 @@ static int synquacer_spi_probe(struct platform_device *pdev) 0, sspi->rx_irq_name, sspi); if (ret) { dev_err(&pdev->dev, "request rx_irq failed (%d)\n", ret); - goto put_spi; + goto disable_clk; } tx_irq = platform_get_irq(pdev, 1); if (tx_irq <= 0) { ret = tx_irq; - goto put_spi; + goto disable_clk; } snprintf(sspi->tx_irq_name, SYNQUACER_HSSPI_IRQ_NAME_MAX, "%s-tx", dev_name(&pdev->dev)); @@ -692,7 +693,7 @@ static int synquacer_spi_probe(struct platform_device *pdev) 0, sspi->tx_irq_name, sspi); if (ret) { dev_err(&pdev->dev, "request tx_irq failed (%d)\n", ret); - goto put_spi; + goto disable_clk; } master->dev.of_node = np; @@ -710,7 +711,7 @@ static int synquacer_spi_probe(struct platform_device *pdev) ret = synquacer_spi_enable(master); if (ret) - goto fail_enable; + goto disable_clk; pm_runtime_set_active(sspi->dev); pm_runtime_enable(sspi->dev); @@ -723,7 +724,7 @@ static int synquacer_spi_probe(struct platform_device *pdev) disable_pm: pm_runtime_disable(sspi->dev); -fail_enable: +disable_clk: clk_disable_unprepare(sspi->clk); put_spi: spi_master_put(master); diff --git a/drivers/spi/spi-tegra114.c b/drivers/spi/spi-tegra114.c index ca6886aaa51970..a2e5907276e7fd 100644 --- a/drivers/spi/spi-tegra114.c +++ b/drivers/spi/spi-tegra114.c @@ -966,6 +966,7 @@ static int tegra_spi_setup(struct spi_device *spi) ret = pm_runtime_get_sync(tspi->dev); if (ret < 0) { + pm_runtime_put_noidle(tspi->dev); dev_err(tspi->dev, "pm runtime failed, e = %d\n", ret); if (cdata) tegra_spi_cleanup(spi); @@ -1474,6 +1475,7 @@ static int tegra_spi_resume(struct device *dev) ret = pm_runtime_get_sync(dev); if (ret < 0) { + pm_runtime_put_noidle(dev); dev_err(dev, "pm runtime failed, e = %d\n", ret); return ret; } diff --git a/drivers/spi/spi-tegra20-sflash.c b/drivers/spi/spi-tegra20-sflash.c index b59015c7c8a804..cfb7de73793763 100644 --- a/drivers/spi/spi-tegra20-sflash.c +++ b/drivers/spi/spi-tegra20-sflash.c @@ -552,6 +552,7 @@ static int tegra_sflash_resume(struct device *dev) ret = pm_runtime_get_sync(dev); if (ret < 0) { + pm_runtime_put_noidle(dev); dev_err(dev, "pm runtime failed, e = %d\n", ret); return ret; } diff --git a/drivers/spi/spi-tegra20-slink.c b/drivers/spi/spi-tegra20-slink.c index a0810765d4e525..f7c832fd40036c 100644 --- a/drivers/spi/spi-tegra20-slink.c +++ b/drivers/spi/spi-tegra20-slink.c @@ -751,6 +751,7 @@ static int tegra_slink_setup(struct spi_device *spi) ret = pm_runtime_get_sync(tspi->dev); if (ret < 0) { + pm_runtime_put_noidle(tspi->dev); dev_err(tspi->dev, "pm runtime failed, e = %d\n", ret); return ret; } @@ -1188,6 +1189,7 @@ static int tegra_slink_resume(struct device *dev) ret = pm_runtime_get_sync(dev); if (ret < 0) { + pm_runtime_put_noidle(dev); dev_err(dev, "pm runtime failed, e = %d\n", ret); return ret; } diff --git a/drivers/spi/spi-ti-qspi.c b/drivers/spi/spi-ti-qspi.c index 3c41649698a5b0..9417385c092175 100644 --- a/drivers/spi/spi-ti-qspi.c +++ b/drivers/spi/spi-ti-qspi.c @@ -174,6 +174,7 @@ static int ti_qspi_setup(struct spi_device *spi) ret = pm_runtime_get_sync(qspi->dev); if (ret < 0) { + pm_runtime_put_noidle(qspi->dev); dev_err(qspi->dev, "pm_runtime_get_sync() failed\n"); return ret; } diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index fc9a59788d2eab..7694e1ae5b0b2f 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -405,9 +405,11 @@ static int spi_drv_probe(struct device *dev) if (ret) return ret; - ret = sdrv->probe(spi); - if (ret) - dev_pm_domain_detach(dev, true); + if (sdrv->probe) { + ret = sdrv->probe(spi); + if (ret) + dev_pm_domain_detach(dev, true); + } return ret; } @@ -415,9 +417,10 @@ static int spi_drv_probe(struct device *dev) static int spi_drv_remove(struct device *dev) { const struct spi_driver *sdrv = to_spi_driver(dev->driver); - int ret; + int ret = 0; - ret = sdrv->remove(to_spi_device(dev)); + if (sdrv->remove) + ret = sdrv->remove(to_spi_device(dev)); dev_pm_domain_detach(dev, true); return ret; @@ -442,10 +445,8 @@ int __spi_register_driver(struct module *owner, struct spi_driver *sdrv) { sdrv->driver.owner = owner; sdrv->driver.bus = &spi_bus_type; - if (sdrv->probe) - sdrv->driver.probe = spi_drv_probe; - if (sdrv->remove) - sdrv->driver.remove = spi_drv_remove; + sdrv->driver.probe = spi_drv_probe; + sdrv->driver.remove = spi_drv_remove; if (sdrv->shutdown) sdrv->driver.shutdown = spi_drv_shutdown; return driver_register(&sdrv->driver); @@ -1099,6 +1100,7 @@ static int spi_transfer_wait(struct spi_controller *ctlr, { struct spi_statistics *statm = &ctlr->statistics; struct spi_statistics *stats = &msg->spi->statistics; + u32 speed_hz = xfer->speed_hz; unsigned long long ms; if (spi_controller_is_slave(ctlr)) { @@ -1107,8 +1109,11 @@ static int spi_transfer_wait(struct spi_controller *ctlr, return -EINTR; } } else { + if (!speed_hz) + speed_hz = 100000; + ms = 8LL * 1000LL * xfer->len; - do_div(ms, xfer->speed_hz); + do_div(ms, speed_hz); ms += ms + 200; /* some tolerance */ if (ms > UINT_MAX) diff --git a/drivers/staging/comedi/comedi_fops.c b/drivers/staging/comedi/comedi_fops.c index d99231c737fbfb..80d74cce2a0105 100644 --- a/drivers/staging/comedi/comedi_fops.c +++ b/drivers/staging/comedi/comedi_fops.c @@ -2987,7 +2987,9 @@ static int put_compat_cmd(struct comedi32_cmd_struct __user *cmd32, v32.chanlist_len = cmd->chanlist_len; v32.data = ptr_to_compat(cmd->data); v32.data_len = cmd->data_len; - return copy_to_user(cmd32, &v32, sizeof(v32)); + if (copy_to_user(cmd32, &v32, sizeof(v32))) + return -EFAULT; + return 0; } /* Handle 32-bit COMEDI_CMD ioctl. */ diff --git a/drivers/staging/comedi/drivers/mf6x4.c b/drivers/staging/comedi/drivers/mf6x4.c index ea430237efa7f6..9da8dd748078de 100644 --- a/drivers/staging/comedi/drivers/mf6x4.c +++ b/drivers/staging/comedi/drivers/mf6x4.c @@ -112,8 +112,9 @@ static int mf6x4_ai_eoc(struct comedi_device *dev, struct mf6x4_private *devpriv = dev->private; unsigned int status; + /* EOLC goes low at end of conversion. */ status = ioread32(devpriv->gpioc_reg); - if (status & MF6X4_GPIOC_EOLC) + if ((status & MF6X4_GPIOC_EOLC) == 0) return 0; return -EBUSY; } diff --git a/drivers/staging/gasket/gasket_interrupt.c b/drivers/staging/gasket/gasket_interrupt.c index 2d6195f7300e91..864342acfd86e8 100644 --- a/drivers/staging/gasket/gasket_interrupt.c +++ b/drivers/staging/gasket/gasket_interrupt.c @@ -487,14 +487,16 @@ int gasket_interrupt_system_status(struct gasket_dev *gasket_dev) int gasket_interrupt_set_eventfd(struct gasket_interrupt_data *interrupt_data, int interrupt, int event_fd) { - struct eventfd_ctx *ctx = eventfd_ctx_fdget(event_fd); - - if (IS_ERR(ctx)) - return PTR_ERR(ctx); + struct eventfd_ctx *ctx; if (interrupt < 0 || interrupt >= interrupt_data->num_interrupts) return -EINVAL; + ctx = eventfd_ctx_fdget(event_fd); + + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + interrupt_data->eventfd_ctxs[interrupt] = ctx; return 0; } @@ -505,6 +507,9 @@ int gasket_interrupt_clear_eventfd(struct gasket_interrupt_data *interrupt_data, if (interrupt < 0 || interrupt >= interrupt_data->num_interrupts) return -EINVAL; - interrupt_data->eventfd_ctxs[interrupt] = NULL; + if (interrupt_data->eventfd_ctxs[interrupt]) { + eventfd_ctx_put(interrupt_data->eventfd_ctxs[interrupt]); + interrupt_data->eventfd_ctxs[interrupt] = NULL; + } return 0; } diff --git a/drivers/staging/greybus/audio_codec.c b/drivers/staging/greybus/audio_codec.c index 494aa823e99849..42ce6c88ea7531 100644 --- a/drivers/staging/greybus/audio_codec.c +++ b/drivers/staging/greybus/audio_codec.c @@ -490,6 +490,7 @@ static int gbcodec_hw_params(struct snd_pcm_substream *substream, if (ret) { dev_err_ratelimited(dai->dev, "%d: Error during set_config\n", ret); + gb_pm_runtime_put_noidle(bundle); mutex_unlock(&codec->lock); return ret; } @@ -566,6 +567,7 @@ static int gbcodec_prepare(struct snd_pcm_substream *substream, break; } if (ret) { + gb_pm_runtime_put_noidle(bundle); mutex_unlock(&codec->lock); dev_err_ratelimited(dai->dev, "set_data_size failed:%d\n", ret); diff --git a/drivers/staging/greybus/audio_helper.c b/drivers/staging/greybus/audio_helper.c index 237531ba60f30e..3011b8abce389d 100644 --- a/drivers/staging/greybus/audio_helper.c +++ b/drivers/staging/greybus/audio_helper.c @@ -135,7 +135,8 @@ int gbaudio_dapm_free_controls(struct snd_soc_dapm_context *dapm, if (!w) { dev_err(dapm->dev, "%s: widget not found\n", widget->name); - return -EINVAL; + widget++; + continue; } widget++; #ifdef CONFIG_DEBUG_FS diff --git a/drivers/staging/hikey9xx/hi6421-spmi-pmic.c b/drivers/staging/hikey9xx/hi6421-spmi-pmic.c index 64b30d263c8d08..4f34a528297002 100644 --- a/drivers/staging/hikey9xx/hi6421-spmi-pmic.c +++ b/drivers/staging/hikey9xx/hi6421-spmi-pmic.c @@ -262,8 +262,10 @@ static int hi6421_spmi_pmic_probe(struct spmi_device *pdev) hi6421_spmi_pmic_irq_prc(pmic); pmic->irqs = devm_kzalloc(dev, HISI_IRQ_NUM * sizeof(int), GFP_KERNEL); - if (!pmic->irqs) + if (!pmic->irqs) { + ret = -ENOMEM; goto irq_malloc; + } pmic->domain = irq_domain_add_simple(np, HISI_IRQ_NUM, 0, &hi6421_spmi_domain_ops, pmic); diff --git a/drivers/staging/hikey9xx/hisi-spmi-controller.c b/drivers/staging/hikey9xx/hisi-spmi-controller.c index f831c43f4783f1..29f226503668d2 100644 --- a/drivers/staging/hikey9xx/hisi-spmi-controller.c +++ b/drivers/staging/hikey9xx/hisi-spmi-controller.c @@ -278,21 +278,24 @@ static int spmi_controller_probe(struct platform_device *pdev) iores = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (!iores) { dev_err(&pdev->dev, "can not get resource!\n"); - return -EINVAL; + ret = -EINVAL; + goto err_put_controller; } spmi_controller->base = devm_ioremap(&pdev->dev, iores->start, resource_size(iores)); if (!spmi_controller->base) { dev_err(&pdev->dev, "can not remap base addr!\n"); - return -EADDRNOTAVAIL; + ret = -EADDRNOTAVAIL; + goto err_put_controller; } ret = of_property_read_u32(pdev->dev.of_node, "spmi-channel", &spmi_controller->channel); if (ret) { dev_err(&pdev->dev, "can not get channel\n"); - return -ENODEV; + ret = -ENODEV; + goto err_put_controller; } platform_set_drvdata(pdev, spmi_controller); @@ -309,9 +312,15 @@ static int spmi_controller_probe(struct platform_device *pdev) ctrl->write_cmd = spmi_write_cmd; ret = spmi_controller_add(ctrl); - if (ret) - dev_err(&pdev->dev, "spmi_add_controller failed with error %d!\n", ret); + if (ret) { + dev_err(&pdev->dev, "spmi_controller_add failed with error %d!\n", ret); + goto err_put_controller; + } + + return 0; +err_put_controller: + spmi_controller_put(ctrl); return ret; } @@ -320,7 +329,7 @@ static int spmi_del_controller(struct platform_device *pdev) struct spmi_controller *ctrl = platform_get_drvdata(pdev); spmi_controller_remove(ctrl); - kfree(ctrl); + spmi_controller_put(ctrl); return 0; } diff --git a/drivers/staging/media/hantro/hantro_v4l2.c b/drivers/staging/media/hantro/hantro_v4l2.c index b668a82d40ad46..f5fbdbc4ffdb18 100644 --- a/drivers/staging/media/hantro/hantro_v4l2.c +++ b/drivers/staging/media/hantro/hantro_v4l2.c @@ -367,7 +367,7 @@ hantro_reset_raw_fmt(struct hantro_ctx *ctx) hantro_reset_fmt(raw_fmt, raw_vpu_fmt); raw_fmt->width = encoded_fmt->width; - raw_fmt->width = encoded_fmt->width; + raw_fmt->height = encoded_fmt->height; if (ctx->is_encoder) hantro_set_fmt_out(ctx, raw_fmt); else diff --git a/drivers/staging/media/rkisp1/rkisp1-capture.c b/drivers/staging/media/rkisp1/rkisp1-capture.c index b6f497ce3e95c3..0c934ca5adaa35 100644 --- a/drivers/staging/media/rkisp1/rkisp1-capture.c +++ b/drivers/staging/media/rkisp1/rkisp1-capture.c @@ -992,6 +992,7 @@ rkisp1_vb2_start_streaming(struct vb2_queue *queue, unsigned int count) ret = pm_runtime_get_sync(cap->rkisp1->dev); if (ret < 0) { + pm_runtime_put_noidle(cap->rkisp1->dev); dev_err(cap->rkisp1->dev, "power up failed %d\n", ret); goto err_destroy_dummy; } diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_h264.c b/drivers/staging/media/sunxi/cedrus/cedrus_h264.c index 781c84a9b1b79e..de7442d4834dca 100644 --- a/drivers/staging/media/sunxi/cedrus/cedrus_h264.c +++ b/drivers/staging/media/sunxi/cedrus/cedrus_h264.c @@ -203,7 +203,7 @@ static void _cedrus_write_ref_list(struct cedrus_ctx *ctx, position = cedrus_buf->codec.h264.position; sram_array[i] |= position << 1; - if (ref_list[i].fields & V4L2_H264_BOTTOM_FIELD_REF) + if (ref_list[i].fields == V4L2_H264_BOTTOM_FIELD_REF) sram_array[i] |= BIT(0); } diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_video.c b/drivers/staging/media/sunxi/cedrus/cedrus_video.c index 667b86dde1ee88..911f607d9b092f 100644 --- a/drivers/staging/media/sunxi/cedrus/cedrus_video.c +++ b/drivers/staging/media/sunxi/cedrus/cedrus_video.c @@ -479,8 +479,10 @@ static int cedrus_start_streaming(struct vb2_queue *vq, unsigned int count) if (V4L2_TYPE_IS_OUTPUT(vq->type)) { ret = pm_runtime_get_sync(dev->dev); - if (ret < 0) + if (ret < 0) { + pm_runtime_put_noidle(dev->dev); goto err_cleanup; + } if (dev->dec_ops[ctx->current_codec]->start) { ret = dev->dec_ops[ctx->current_codec]->start(ctx); diff --git a/drivers/staging/mt7621-dma/mtk-hsdma.c b/drivers/staging/mt7621-dma/mtk-hsdma.c index 354536783e1ce6..5ad55ca6202294 100644 --- a/drivers/staging/mt7621-dma/mtk-hsdma.c +++ b/drivers/staging/mt7621-dma/mtk-hsdma.c @@ -712,7 +712,7 @@ static int mtk_hsdma_probe(struct platform_device *pdev) ret = dma_async_device_register(dd); if (ret) { dev_err(&pdev->dev, "failed to register dma device\n"); - return ret; + goto err_uninit_hsdma; } ret = of_dma_controller_register(pdev->dev.of_node, @@ -728,6 +728,8 @@ static int mtk_hsdma_probe(struct platform_device *pdev) err_unregister: dma_async_device_unregister(dd); +err_uninit_hsdma: + mtk_hsdma_uninit(hsdma); return ret; } diff --git a/drivers/staging/vc04_services/vchiq-mmal/Kconfig b/drivers/staging/vc04_services/vchiq-mmal/Kconfig index 500c0d12e4ff2f..c99525a0bb4525 100644 --- a/drivers/staging/vc04_services/vchiq-mmal/Kconfig +++ b/drivers/staging/vc04_services/vchiq-mmal/Kconfig @@ -1,6 +1,6 @@ config BCM2835_VCHIQ_MMAL tristate "BCM2835 MMAL VCHIQ service" - depends on (ARCH_BCM2835 || COMPILE_TEST) + depends on BCM2835_VCHIQ help Enables the MMAL API over VCHIQ interface as used for the majority of the multimedia services on VideoCore. diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index 590e6d07222810..7d5814a95e1ed0 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -562,8 +562,6 @@ tcmu_get_block_page(struct tcmu_dev *udev, uint32_t dbi) static inline void tcmu_free_cmd(struct tcmu_cmd *tcmu_cmd) { - if (tcmu_cmd->se_cmd) - tcmu_cmd->se_cmd->priv = NULL; kfree(tcmu_cmd->dbi); kmem_cache_free(tcmu_cmd_cache, tcmu_cmd); } @@ -1188,11 +1186,12 @@ tcmu_queue_cmd(struct se_cmd *se_cmd) return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; mutex_lock(&udev->cmdr_lock); - se_cmd->priv = tcmu_cmd; if (!(se_cmd->transport_state & CMD_T_ABORTED)) ret = queue_cmd_ring(tcmu_cmd, &scsi_ret); if (ret < 0) tcmu_free_cmd(tcmu_cmd); + else + se_cmd->priv = tcmu_cmd; mutex_unlock(&udev->cmdr_lock); return scsi_ret; } @@ -1255,6 +1254,7 @@ tcmu_tmr_notify(struct se_device *se_dev, enum tcm_tmreq_table tmf, list_del_init(&cmd->queue_entry); tcmu_free_cmd(cmd); + se_cmd->priv = NULL; target_complete_cmd(se_cmd, SAM_STAT_TASK_ABORTED); unqueued = true; } @@ -1346,6 +1346,7 @@ static void tcmu_handle_completion(struct tcmu_cmd *cmd, struct tcmu_cmd_entry * } done: + se_cmd->priv = NULL; if (read_len_valid) { pr_debug("read_len = %d\n", read_len); target_complete_cmd_with_length(cmd->se_cmd, @@ -1492,6 +1493,7 @@ static void tcmu_check_expired_queue_cmd(struct tcmu_cmd *cmd) se_cmd = cmd->se_cmd; tcmu_free_cmd(cmd); + se_cmd->priv = NULL; target_complete_cmd(se_cmd, SAM_STAT_TASK_SET_FULL); } @@ -1606,6 +1608,7 @@ static void run_qfull_queue(struct tcmu_dev *udev, bool fail) * removed then LIO core will do the right thing and * fail the retry. */ + tcmu_cmd->se_cmd->priv = NULL; target_complete_cmd(tcmu_cmd->se_cmd, SAM_STAT_BUSY); tcmu_free_cmd(tcmu_cmd); continue; @@ -1619,6 +1622,7 @@ static void run_qfull_queue(struct tcmu_dev *udev, bool fail) * Ignore scsi_ret for now. target_complete_cmd * drops it. */ + tcmu_cmd->se_cmd->priv = NULL; target_complete_cmd(tcmu_cmd->se_cmd, SAM_STAT_CHECK_CONDITION); tcmu_free_cmd(tcmu_cmd); @@ -2226,6 +2230,7 @@ static void tcmu_reset_ring(struct tcmu_dev *udev, u8 err_level) if (!test_bit(TCMU_CMD_BIT_EXPIRED, &cmd->flags)) { WARN_ON(!cmd->se_cmd); list_del_init(&cmd->queue_entry); + cmd->se_cmd->priv = NULL; if (err_level == 1) { /* * Userspace was not able to start the diff --git a/drivers/target/target_core_xcopy.c b/drivers/target/target_core_xcopy.c index 44e15d7fb2f09e..66d6f1d06f219d 100644 --- a/drivers/target/target_core_xcopy.c +++ b/drivers/target/target_core_xcopy.c @@ -46,60 +46,83 @@ static int target_xcopy_gen_naa_ieee(struct se_device *dev, unsigned char *buf) return 0; } -struct xcopy_dev_search_info { - const unsigned char *dev_wwn; - struct se_device *found_dev; -}; - +/** + * target_xcopy_locate_se_dev_e4_iter - compare XCOPY NAA device identifiers + * + * @se_dev: device being considered for match + * @dev_wwn: XCOPY requested NAA dev_wwn + * @return: 1 on match, 0 on no-match + */ static int target_xcopy_locate_se_dev_e4_iter(struct se_device *se_dev, - void *data) + const unsigned char *dev_wwn) { - struct xcopy_dev_search_info *info = data; unsigned char tmp_dev_wwn[XCOPY_NAA_IEEE_REGEX_LEN]; int rc; - if (!se_dev->dev_attrib.emulate_3pc) + if (!se_dev->dev_attrib.emulate_3pc) { + pr_debug("XCOPY: emulate_3pc disabled on se_dev %p\n", se_dev); return 0; + } memset(&tmp_dev_wwn[0], 0, XCOPY_NAA_IEEE_REGEX_LEN); target_xcopy_gen_naa_ieee(se_dev, &tmp_dev_wwn[0]); - rc = memcmp(&tmp_dev_wwn[0], info->dev_wwn, XCOPY_NAA_IEEE_REGEX_LEN); - if (rc != 0) - return 0; - - info->found_dev = se_dev; - pr_debug("XCOPY 0xe4: located se_dev: %p\n", se_dev); - - rc = target_depend_item(&se_dev->dev_group.cg_item); + rc = memcmp(&tmp_dev_wwn[0], dev_wwn, XCOPY_NAA_IEEE_REGEX_LEN); if (rc != 0) { - pr_err("configfs_depend_item attempt failed: %d for se_dev: %p\n", - rc, se_dev); - return rc; + pr_debug("XCOPY: skip non-matching: %*ph\n", + XCOPY_NAA_IEEE_REGEX_LEN, tmp_dev_wwn); + return 0; } + pr_debug("XCOPY 0xe4: located se_dev: %p\n", se_dev); - pr_debug("Called configfs_depend_item for se_dev: %p se_dev->se_dev_group: %p\n", - se_dev, &se_dev->dev_group); return 1; } -static int target_xcopy_locate_se_dev_e4(const unsigned char *dev_wwn, - struct se_device **found_dev) +static int target_xcopy_locate_se_dev_e4(struct se_session *sess, + const unsigned char *dev_wwn, + struct se_device **_found_dev, + struct percpu_ref **_found_lun_ref) { - struct xcopy_dev_search_info info; - int ret; - - memset(&info, 0, sizeof(info)); - info.dev_wwn = dev_wwn; - - ret = target_for_each_device(target_xcopy_locate_se_dev_e4_iter, &info); - if (ret == 1) { - *found_dev = info.found_dev; - return 0; - } else { - pr_debug_ratelimited("Unable to locate 0xe4 descriptor for EXTENDED_COPY\n"); - return -EINVAL; + struct se_dev_entry *deve; + struct se_node_acl *nacl; + struct se_lun *this_lun = NULL; + struct se_device *found_dev = NULL; + + /* cmd with NULL sess indicates no associated $FABRIC_MOD */ + if (!sess) + goto err_out; + + pr_debug("XCOPY 0xe4: searching for: %*ph\n", + XCOPY_NAA_IEEE_REGEX_LEN, dev_wwn); + + nacl = sess->se_node_acl; + rcu_read_lock(); + hlist_for_each_entry_rcu(deve, &nacl->lun_entry_hlist, link) { + struct se_device *this_dev; + int rc; + + this_lun = rcu_dereference(deve->se_lun); + this_dev = rcu_dereference_raw(this_lun->lun_se_dev); + + rc = target_xcopy_locate_se_dev_e4_iter(this_dev, dev_wwn); + if (rc) { + if (percpu_ref_tryget_live(&this_lun->lun_ref)) + found_dev = this_dev; + break; + } } + rcu_read_unlock(); + if (found_dev == NULL) + goto err_out; + + pr_debug("lun_ref held for se_dev: %p se_dev->se_dev_group: %p\n", + found_dev, &found_dev->dev_group); + *_found_dev = found_dev; + *_found_lun_ref = &this_lun->lun_ref; + return 0; +err_out: + pr_debug_ratelimited("Unable to locate 0xe4 descriptor for EXTENDED_COPY\n"); + return -EINVAL; } static int target_xcopy_parse_tiddesc_e4(struct se_cmd *se_cmd, struct xcopy_op *xop, @@ -246,12 +269,16 @@ static int target_xcopy_parse_target_descriptors(struct se_cmd *se_cmd, switch (xop->op_origin) { case XCOL_SOURCE_RECV_OP: - rc = target_xcopy_locate_se_dev_e4(xop->dst_tid_wwn, - &xop->dst_dev); + rc = target_xcopy_locate_se_dev_e4(se_cmd->se_sess, + xop->dst_tid_wwn, + &xop->dst_dev, + &xop->remote_lun_ref); break; case XCOL_DEST_RECV_OP: - rc = target_xcopy_locate_se_dev_e4(xop->src_tid_wwn, - &xop->src_dev); + rc = target_xcopy_locate_se_dev_e4(se_cmd->se_sess, + xop->src_tid_wwn, + &xop->src_dev, + &xop->remote_lun_ref); break; default: pr_err("XCOPY CSCD descriptor IDs not found in CSCD list - " @@ -391,18 +418,12 @@ static int xcopy_pt_get_cmd_state(struct se_cmd *se_cmd) static void xcopy_pt_undepend_remotedev(struct xcopy_op *xop) { - struct se_device *remote_dev; - if (xop->op_origin == XCOL_SOURCE_RECV_OP) - remote_dev = xop->dst_dev; + pr_debug("putting dst lun_ref for %p\n", xop->dst_dev); else - remote_dev = xop->src_dev; - - pr_debug("Calling configfs_undepend_item for" - " remote_dev: %p remote_dev->dev_group: %p\n", - remote_dev, &remote_dev->dev_group.cg_item); + pr_debug("putting src lun_ref for %p\n", xop->src_dev); - target_undepend_item(&remote_dev->dev_group.cg_item); + percpu_ref_put(xop->remote_lun_ref); } static void xcopy_pt_release_cmd(struct se_cmd *se_cmd) diff --git a/drivers/target/target_core_xcopy.h b/drivers/target/target_core_xcopy.h index c56a1bde9417bc..e5f20005179a86 100644 --- a/drivers/target/target_core_xcopy.h +++ b/drivers/target/target_core_xcopy.h @@ -27,6 +27,7 @@ struct xcopy_op { struct se_device *dst_dev; unsigned char dst_tid_wwn[XCOPY_NAA_IEEE_REGEX_LEN]; unsigned char local_dev_wwn[XCOPY_NAA_IEEE_REGEX_LEN]; + struct percpu_ref *remote_lun_ref; sector_t src_lba; sector_t dst_lba; diff --git a/drivers/tee/optee/call.c b/drivers/tee/optee/call.c index c981757ba0d405..780d7c4fd75653 100644 --- a/drivers/tee/optee/call.c +++ b/drivers/tee/optee/call.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -148,7 +149,8 @@ u32 optee_do_call_with_arg(struct tee_context *ctx, phys_addr_t parg) */ optee_cq_wait_for_completion(&optee->call_queue, &w); } else if (OPTEE_SMC_RETURN_IS_RPC(res.a0)) { - might_sleep(); + if (need_resched()) + cond_resched(); param.a0 = res.a0; param.a1 = res.a1; param.a2 = res.a2; diff --git a/drivers/thermal/cpufreq_cooling.c b/drivers/thermal/cpufreq_cooling.c index cc2959f22f01a9..612f063c1cfcd4 100644 --- a/drivers/thermal/cpufreq_cooling.c +++ b/drivers/thermal/cpufreq_cooling.c @@ -438,13 +438,11 @@ static int cpufreq_set_cur_state(struct thermal_cooling_device *cdev, if (cpufreq_cdev->cpufreq_state == state) return 0; - cpufreq_cdev->cpufreq_state = state; - frequency = get_state_freq(cpufreq_cdev, state); ret = freq_qos_update_request(&cpufreq_cdev->qos_req, frequency); - if (ret > 0) { + cpufreq_cdev->cpufreq_state = state; cpus = cpufreq_cdev->policy->cpus; max_capacity = arch_scale_cpu_capacity(cpumask_first(cpus)); capacity = frequency * max_capacity; diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c index 7e5e3631526076..c2869489ba681c 100644 --- a/drivers/tty/n_tty.c +++ b/drivers/tty/n_tty.c @@ -2079,9 +2079,6 @@ static int canon_copy_from_read_buf(struct tty_struct *tty, return 0; } -extern ssize_t redirected_tty_write(struct file *, const char __user *, - size_t, loff_t *); - /** * job_control - check job control * @tty: tty @@ -2103,7 +2100,7 @@ static int job_control(struct tty_struct *tty, struct file *file) /* NOTE: not yet done after every sleep pending a thorough check of the logic of this change. -- jlc */ /* don't stop on /dev/console */ - if (file->f_op->write == redirected_tty_write) + if (file->f_op->write_iter == redirected_tty_write) return 0; return __tty_check_change(tty, SIGTTIN); @@ -2307,7 +2304,7 @@ static ssize_t n_tty_write(struct tty_struct *tty, struct file *file, ssize_t retval = 0; /* Job control check -- must be done at start (POSIX.1 7.1.1.4). */ - if (L_TOSTOP(tty) && file->f_op->write != redirected_tty_write) { + if (L_TOSTOP(tty) && file->f_op->write_iter != redirected_tty_write) { retval = tty_check_change(tty); if (retval) return retval; diff --git a/drivers/tty/serial/8250/8250_mtk.c b/drivers/tty/serial/8250/8250_mtk.c index fa876e2c13e5dc..f7d3023f860f06 100644 --- a/drivers/tty/serial/8250/8250_mtk.c +++ b/drivers/tty/serial/8250/8250_mtk.c @@ -572,15 +572,22 @@ static int mtk8250_probe(struct platform_device *pdev) pm_runtime_enable(&pdev->dev); err = mtk8250_runtime_resume(&pdev->dev); if (err) - return err; + goto err_pm_disable; data->line = serial8250_register_8250_port(&uart); - if (data->line < 0) - return data->line; + if (data->line < 0) { + err = data->line; + goto err_pm_disable; + } data->rx_wakeup_irq = platform_get_irq_optional(pdev, 1); return 0; + +err_pm_disable: + pm_runtime_disable(&pdev->dev); + + return err; } static int mtk8250_remove(struct platform_device *pdev) diff --git a/drivers/tty/serial/8250/8250_omap.c b/drivers/tty/serial/8250/8250_omap.c index 562087df7d334b..0cc6d35a081560 100644 --- a/drivers/tty/serial/8250/8250_omap.c +++ b/drivers/tty/serial/8250/8250_omap.c @@ -184,11 +184,6 @@ static void omap_8250_mdr1_errataset(struct uart_8250_port *up, struct omap8250_priv *priv) { u8 timeout = 255; - u8 old_mdr1; - - old_mdr1 = serial_in(up, UART_OMAP_MDR1); - if (old_mdr1 == priv->mdr1) - return; serial_out(up, UART_OMAP_MDR1, priv->mdr1); udelay(2); diff --git a/drivers/tty/serial/mvebu-uart.c b/drivers/tty/serial/mvebu-uart.c index 118b2991228984..e0c00a1b07639b 100644 --- a/drivers/tty/serial/mvebu-uart.c +++ b/drivers/tty/serial/mvebu-uart.c @@ -648,6 +648,14 @@ static void wait_for_xmitr(struct uart_port *port) (val & STAT_TX_RDY(port)), 1, 10000); } +static void wait_for_xmite(struct uart_port *port) +{ + u32 val; + + readl_poll_timeout_atomic(port->membase + UART_STAT, val, + (val & STAT_TX_EMP), 1, 10000); +} + static void mvebu_uart_console_putchar(struct uart_port *port, int ch) { wait_for_xmitr(port); @@ -675,7 +683,7 @@ static void mvebu_uart_console_write(struct console *co, const char *s, uart_console_write(port, s, count, mvebu_uart_console_putchar); - wait_for_xmitr(port); + wait_for_xmite(port); if (ier) writel(ier, port->membase + UART_CTRL(port)); diff --git a/drivers/tty/serial/pmac_zilog.c b/drivers/tty/serial/pmac_zilog.c index 063484b22523a5..d6aef8a1f0a48b 100644 --- a/drivers/tty/serial/pmac_zilog.c +++ b/drivers/tty/serial/pmac_zilog.c @@ -1693,22 +1693,26 @@ static int __init pmz_probe(void) #else +/* On PCI PowerMacs, pmz_probe() does an explicit search of the OpenFirmware + * tree to obtain the device_nodes needed to start the console before the + * macio driver. On Macs without OpenFirmware, global platform_devices take + * the place of those device_nodes. + */ extern struct platform_device scc_a_pdev, scc_b_pdev; static int __init pmz_init_port(struct uart_pmac_port *uap) { - struct resource *r_ports; - int irq; + struct resource *r_ports, *r_irq; r_ports = platform_get_resource(uap->pdev, IORESOURCE_MEM, 0); - irq = platform_get_irq(uap->pdev, 0); - if (!r_ports || irq <= 0) + r_irq = platform_get_resource(uap->pdev, IORESOURCE_IRQ, 0); + if (!r_ports || !r_irq) return -ENODEV; uap->port.mapbase = r_ports->start; uap->port.membase = (unsigned char __iomem *) r_ports->start; uap->port.iotype = UPIO_MEM; - uap->port.irq = irq; + uap->port.irq = r_irq->start; uap->port.uartclk = ZS_CLOCK; uap->port.fifosize = 1; uap->port.ops = &pmz_pops; diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c index f41cba10b86b9f..828f9ad1be49ce 100644 --- a/drivers/tty/serial/serial_core.c +++ b/drivers/tty/serial/serial_core.c @@ -1467,6 +1467,10 @@ static void uart_set_ldisc(struct tty_struct *tty) { struct uart_state *state = tty->driver_data; struct uart_port *uport; + struct tty_port *port = &state->port; + + if (!tty_port_initialized(port)) + return; mutex_lock(&state->port.mutex); uport = uart_port_check(state); diff --git a/drivers/tty/serial/sifive.c b/drivers/tty/serial/sifive.c index 13eadcb8aec4e1..214bf3086c68a2 100644 --- a/drivers/tty/serial/sifive.c +++ b/drivers/tty/serial/sifive.c @@ -999,6 +999,7 @@ static int sifive_serial_probe(struct platform_device *pdev) /* Set up clock divider */ ssp->clkin_rate = clk_get_rate(ssp->clk); ssp->baud_rate = SIFIVE_DEFAULT_BAUD_RATE; + ssp->port.uartclk = ssp->baud_rate * 16; __ssp_update_div(ssp); platform_set_drvdata(pdev, ssp); diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c index 56ade99ef99f41..ff87cb51747d81 100644 --- a/drivers/tty/tty_io.c +++ b/drivers/tty/tty_io.c @@ -143,12 +143,9 @@ LIST_HEAD(tty_drivers); /* linked list of tty drivers */ DEFINE_MUTEX(tty_mutex); static ssize_t tty_read(struct file *, char __user *, size_t, loff_t *); -static ssize_t tty_write(struct file *, const char __user *, size_t, loff_t *); -ssize_t redirected_tty_write(struct file *, const char __user *, - size_t, loff_t *); +static ssize_t tty_write(struct kiocb *, struct iov_iter *); static __poll_t tty_poll(struct file *, poll_table *); static int tty_open(struct inode *, struct file *); -long tty_ioctl(struct file *file, unsigned int cmd, unsigned long arg); #ifdef CONFIG_COMPAT static long tty_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg); @@ -438,8 +435,7 @@ static ssize_t hung_up_tty_read(struct file *file, char __user *buf, return 0; } -static ssize_t hung_up_tty_write(struct file *file, const char __user *buf, - size_t count, loff_t *ppos) +static ssize_t hung_up_tty_write(struct kiocb *iocb, struct iov_iter *from) { return -EIO; } @@ -478,7 +474,8 @@ static void tty_show_fdinfo(struct seq_file *m, struct file *file) static const struct file_operations tty_fops = { .llseek = no_llseek, .read = tty_read, - .write = tty_write, + .write_iter = tty_write, + .splice_write = iter_file_splice_write, .poll = tty_poll, .unlocked_ioctl = tty_ioctl, .compat_ioctl = tty_compat_ioctl, @@ -491,7 +488,8 @@ static const struct file_operations tty_fops = { static const struct file_operations console_fops = { .llseek = no_llseek, .read = tty_read, - .write = redirected_tty_write, + .write_iter = redirected_tty_write, + .splice_write = iter_file_splice_write, .poll = tty_poll, .unlocked_ioctl = tty_ioctl, .compat_ioctl = tty_compat_ioctl, @@ -503,7 +501,7 @@ static const struct file_operations console_fops = { static const struct file_operations hung_up_tty_fops = { .llseek = no_llseek, .read = hung_up_tty_read, - .write = hung_up_tty_write, + .write_iter = hung_up_tty_write, .poll = hung_up_tty_poll, .unlocked_ioctl = hung_up_tty_ioctl, .compat_ioctl = hung_up_tty_compat_ioctl, @@ -607,9 +605,9 @@ static void __tty_hangup(struct tty_struct *tty, int exit_session) /* This breaks for file handles being sent over AF_UNIX sockets ? */ list_for_each_entry(priv, &tty->tty_files, list) { filp = priv->file; - if (filp->f_op->write == redirected_tty_write) + if (filp->f_op->write_iter == redirected_tty_write) cons_filp = filp; - if (filp->f_op->write != tty_write) + if (filp->f_op->write_iter != tty_write) continue; closecount++; __tty_fasync(-1, filp, 0); /* can't block */ @@ -902,9 +900,9 @@ static inline ssize_t do_tty_write( ssize_t (*write)(struct tty_struct *, struct file *, const unsigned char *, size_t), struct tty_struct *tty, struct file *file, - const char __user *buf, - size_t count) + struct iov_iter *from) { + size_t count = iov_iter_count(from); ssize_t ret, written = 0; unsigned int chunk; @@ -956,14 +954,20 @@ static inline ssize_t do_tty_write( size_t size = count; if (size > chunk) size = chunk; + ret = -EFAULT; - if (copy_from_user(tty->write_buf, buf, size)) + if (copy_from_iter(tty->write_buf, size, from) != size) break; + ret = write(tty, file, tty->write_buf, size); if (ret <= 0) break; + + /* FIXME! Have Al check this! */ + if (ret != size) + iov_iter_revert(from, size-ret); + written += ret; - buf += ret; count -= ret; if (!count) break; @@ -1023,8 +1027,7 @@ void tty_write_message(struct tty_struct *tty, char *msg) * write method will not be invoked in parallel for each device. */ -static ssize_t tty_write(struct file *file, const char __user *buf, - size_t count, loff_t *ppos) +static ssize_t file_tty_write(struct file *file, struct kiocb *iocb, struct iov_iter *from) { struct tty_struct *tty = file_tty(file); struct tty_ldisc *ld; @@ -1039,17 +1042,21 @@ static ssize_t tty_write(struct file *file, const char __user *buf, tty_err(tty, "missing write_room method\n"); ld = tty_ldisc_ref_wait(tty); if (!ld) - return hung_up_tty_write(file, buf, count, ppos); + return hung_up_tty_write(iocb, from); if (!ld->ops->write) ret = -EIO; else - ret = do_tty_write(ld->ops->write, tty, file, buf, count); + ret = do_tty_write(ld->ops->write, tty, file, from); tty_ldisc_deref(ld); return ret; } -ssize_t redirected_tty_write(struct file *file, const char __user *buf, - size_t count, loff_t *ppos) +static ssize_t tty_write(struct kiocb *iocb, struct iov_iter *from) +{ + return file_tty_write(iocb->ki_filp, iocb, from); +} + +ssize_t redirected_tty_write(struct kiocb *iocb, struct iov_iter *iter) { struct file *p = NULL; @@ -1058,13 +1065,17 @@ ssize_t redirected_tty_write(struct file *file, const char __user *buf, p = get_file(redirect); spin_unlock(&redirect_lock); + /* + * We know the redirected tty is just another tty, we can can + * call file_tty_write() directly with that file pointer. + */ if (p) { ssize_t res; - res = vfs_write(p, buf, count, &p->f_pos); + res = file_tty_write(p, iocb, iter); fput(p); return res; } - return tty_write(file, buf, count, ppos); + return tty_write(iocb, iter); } /** @@ -2293,7 +2304,7 @@ static int tioccons(struct file *file) { if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (file->f_op->write == redirected_tty_write) { + if (file->f_op->write_iter == redirected_tty_write) { struct file *f; spin_lock(&redirect_lock); f = redirect; @@ -2303,6 +2314,12 @@ static int tioccons(struct file *file) fput(f); return 0; } + if (file->f_op->write_iter != tty_write) + return -ENOTTY; + if (!(file->f_mode & FMODE_WRITE)) + return -EBADF; + if (!(file->f_mode & FMODE_CAN_WRITE)) + return -EINVAL; spin_lock(&redirect_lock); if (redirect) { spin_unlock(&redirect_lock); diff --git a/drivers/usb/cdns3/cdns3-imx.c b/drivers/usb/cdns3/cdns3-imx.c index 54a2d70a9c7305..7e728aab647550 100644 --- a/drivers/usb/cdns3/cdns3-imx.c +++ b/drivers/usb/cdns3/cdns3-imx.c @@ -184,7 +184,11 @@ static int cdns_imx_probe(struct platform_device *pdev) } data->num_clks = ARRAY_SIZE(imx_cdns3_core_clks); - data->clks = (struct clk_bulk_data *)imx_cdns3_core_clks; + data->clks = devm_kmemdup(dev, imx_cdns3_core_clks, + sizeof(imx_cdns3_core_clks), GFP_KERNEL); + if (!data->clks) + return -ENOMEM; + ret = devm_clk_bulk_get(dev, data->num_clks, data->clks); if (ret) return ret; @@ -214,20 +218,11 @@ static int cdns_imx_probe(struct platform_device *pdev) return ret; } -static int cdns_imx_remove_core(struct device *dev, void *data) -{ - struct platform_device *pdev = to_platform_device(dev); - - platform_device_unregister(pdev); - - return 0; -} - static int cdns_imx_remove(struct platform_device *pdev) { struct device *dev = &pdev->dev; - device_for_each_child(dev, NULL, cdns_imx_remove_core); + of_platform_depopulate(dev); platform_set_drvdata(pdev, NULL); return 0; diff --git a/drivers/usb/chipidea/ci_hdrc_imx.c b/drivers/usb/chipidea/ci_hdrc_imx.c index 25c65accf089c1..ee565bdb44d651 100644 --- a/drivers/usb/chipidea/ci_hdrc_imx.c +++ b/drivers/usb/chipidea/ci_hdrc_imx.c @@ -57,7 +57,8 @@ static const struct ci_hdrc_imx_platform_flag imx6sx_usb_data = { static const struct ci_hdrc_imx_platform_flag imx6ul_usb_data = { .flags = CI_HDRC_SUPPORTS_RUNTIME_PM | - CI_HDRC_TURN_VBUS_EARLY_ON, + CI_HDRC_TURN_VBUS_EARLY_ON | + CI_HDRC_DISABLE_DEVICE_STREAMING, }; static const struct ci_hdrc_imx_platform_flag imx7d_usb_data = { @@ -138,9 +139,13 @@ static struct imx_usbmisc_data *usbmisc_get_init_data(struct device *dev) misc_pdev = of_find_device_by_node(args.np); of_node_put(args.np); - if (!misc_pdev || !platform_get_drvdata(misc_pdev)) + if (!misc_pdev) return ERR_PTR(-EPROBE_DEFER); + if (!platform_get_drvdata(misc_pdev)) { + put_device(&misc_pdev->dev); + return ERR_PTR(-EPROBE_DEFER); + } data->dev = &misc_pdev->dev; /* diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index f52f1bc0559f9d..781905745812ea 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -1895,6 +1895,10 @@ static const struct usb_device_id acm_ids[] = { { USB_DEVICE(0x04d8, 0xfd08), .driver_info = IGNORE_DEVICE, }, + + { USB_DEVICE(0x04d8, 0xf58b), + .driver_info = IGNORE_DEVICE, + }, #endif /*Samsung phone in firmware update mode */ diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c index 02d0cfd23bb297..508b1c3f8b731b 100644 --- a/drivers/usb/class/cdc-wdm.c +++ b/drivers/usb/class/cdc-wdm.c @@ -465,13 +465,23 @@ static int service_outstanding_interrupt(struct wdm_device *desc) if (!desc->resp_count || !--desc->resp_count) goto out; + if (test_bit(WDM_DISCONNECTING, &desc->flags)) { + rv = -ENODEV; + goto out; + } + if (test_bit(WDM_RESETTING, &desc->flags)) { + rv = -EIO; + goto out; + } + set_bit(WDM_RESPONDING, &desc->flags); spin_unlock_irq(&desc->iuspin); rv = usb_submit_urb(desc->response, GFP_KERNEL); spin_lock_irq(&desc->iuspin); if (rv) { - dev_err(&desc->intf->dev, - "usb_submit_urb failed with result %d\n", rv); + if (!test_bit(WDM_DISCONNECTING, &desc->flags)) + dev_err(&desc->intf->dev, + "usb_submit_urb failed with result %d\n", rv); /* make sure the next notification trigger a submit */ clear_bit(WDM_RESPONDING, &desc->flags); @@ -1027,9 +1037,9 @@ static void wdm_disconnect(struct usb_interface *intf) wake_up_all(&desc->wait); mutex_lock(&desc->rlock); mutex_lock(&desc->wlock); - kill_urbs(desc); cancel_work_sync(&desc->rxwork); cancel_work_sync(&desc->service_outs_intr); + kill_urbs(desc); mutex_unlock(&desc->wlock); mutex_unlock(&desc->rlock); diff --git a/drivers/usb/class/usblp.c b/drivers/usb/class/usblp.c index 67cbd42421bee7..134dc2005ce97d 100644 --- a/drivers/usb/class/usblp.c +++ b/drivers/usb/class/usblp.c @@ -274,8 +274,25 @@ static int usblp_ctrl_msg(struct usblp *usblp, int request, int type, int dir, i #define usblp_reset(usblp)\ usblp_ctrl_msg(usblp, USBLP_REQ_RESET, USB_TYPE_CLASS, USB_DIR_OUT, USB_RECIP_OTHER, 0, NULL, 0) -#define usblp_hp_channel_change_request(usblp, channel, buffer) \ - usblp_ctrl_msg(usblp, USBLP_REQ_HP_CHANNEL_CHANGE_REQUEST, USB_TYPE_VENDOR, USB_DIR_IN, USB_RECIP_INTERFACE, channel, buffer, 1) +static int usblp_hp_channel_change_request(struct usblp *usblp, int channel, u8 *new_channel) +{ + u8 *buf; + int ret; + + buf = kzalloc(1, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + ret = usblp_ctrl_msg(usblp, USBLP_REQ_HP_CHANNEL_CHANGE_REQUEST, + USB_TYPE_VENDOR, USB_DIR_IN, USB_RECIP_INTERFACE, + channel, buf, 1); + if (ret == 0) + *new_channel = buf[0]; + + kfree(buf); + + return ret; +} /* * See the description for usblp_select_alts() below for the usage diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index fad31ccd1fa838..1b4eb7046b0782 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -342,6 +342,9 @@ static const struct usb_device_id usb_quirk_list[] = { { USB_DEVICE(0x06a3, 0x0006), .driver_info = USB_QUIRK_CONFIG_INTF_STRINGS }, + /* Agfa SNAPSCAN 1212U */ + { USB_DEVICE(0x06bd, 0x0001), .driver_info = USB_QUIRK_RESET_RESUME }, + /* Guillemot Webcam Hercules Dualpix Exchange (2nd ID) */ { USB_DEVICE(0x06f8, 0x0804), .driver_info = USB_QUIRK_RESET_RESUME }, diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h index 2f95f08ca51190..1b241f937d8f42 100644 --- a/drivers/usb/dwc3/core.h +++ b/drivers/usb/dwc3/core.h @@ -285,6 +285,7 @@ /* Global USB2 PHY Vendor Control Register */ #define DWC3_GUSB2PHYACC_NEWREGREQ BIT(25) +#define DWC3_GUSB2PHYACC_DONE BIT(24) #define DWC3_GUSB2PHYACC_BUSY BIT(23) #define DWC3_GUSB2PHYACC_WRITE BIT(22) #define DWC3_GUSB2PHYACC_ADDR(n) (n << 16) diff --git a/drivers/usb/dwc3/dwc3-meson-g12a.c b/drivers/usb/dwc3/dwc3-meson-g12a.c index 417e05381b5d0f..bdf1f98dfad8c8 100644 --- a/drivers/usb/dwc3/dwc3-meson-g12a.c +++ b/drivers/usb/dwc3/dwc3-meson-g12a.c @@ -754,7 +754,7 @@ static int dwc3_meson_g12a_probe(struct platform_device *pdev) ret = priv->drvdata->setup_regmaps(priv, base); if (ret) - return ret; + goto err_disable_clks; if (priv->vbus) { ret = regulator_enable(priv->vbus); diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 78cb4db8a6e45d..ee44321fee3861 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -1763,6 +1763,8 @@ static int dwc3_gadget_ep_dequeue(struct usb_ep *ep, list_for_each_entry_safe(r, t, &dep->started_list, list) dwc3_gadget_move_cancelled_request(r); + dep->flags &= ~DWC3_EP_WAIT_TRANSFER_COMPLETE; + goto out; } } @@ -2083,6 +2085,7 @@ static int dwc3_gadget_run_stop(struct dwc3 *dwc, int is_on, int suspend) static void dwc3_gadget_disable_irq(struct dwc3 *dwc); static void __dwc3_gadget_stop(struct dwc3 *dwc); +static int __dwc3_gadget_start(struct dwc3 *dwc); static int dwc3_gadget_pullup(struct usb_gadget *g, int is_on) { @@ -2145,6 +2148,8 @@ static int dwc3_gadget_pullup(struct usb_gadget *g, int is_on) dwc->ev_buf->lpos = (dwc->ev_buf->lpos + count) % dwc->ev_buf->length; } + } else { + __dwc3_gadget_start(dwc); } ret = dwc3_gadget_run_stop(dwc, is_on, false); @@ -2319,10 +2324,6 @@ static int dwc3_gadget_start(struct usb_gadget *g, } dwc->gadget_driver = driver; - - if (pm_runtime_active(dwc->dev)) - __dwc3_gadget_start(dwc); - spin_unlock_irqrestore(&dwc->lock, flags); return 0; @@ -2348,13 +2349,6 @@ static int dwc3_gadget_stop(struct usb_gadget *g) unsigned long flags; spin_lock_irqsave(&dwc->lock, flags); - - if (pm_runtime_suspended(dwc->dev)) - goto out; - - __dwc3_gadget_stop(dwc); - -out: dwc->gadget_driver = NULL; spin_unlock_irqrestore(&dwc->lock, flags); diff --git a/drivers/usb/dwc3/ulpi.c b/drivers/usb/dwc3/ulpi.c index aa213c9815f67b..f23f4c9a557e9a 100644 --- a/drivers/usb/dwc3/ulpi.c +++ b/drivers/usb/dwc3/ulpi.c @@ -7,6 +7,8 @@ * Author: Heikki Krogerus */ +#include +#include #include #include "core.h" @@ -17,14 +19,28 @@ DWC3_GUSB2PHYACC_ADDR(ULPI_ACCESS_EXTENDED) | \ DWC3_GUSB2PHYACC_EXTEND_ADDR(a) : DWC3_GUSB2PHYACC_ADDR(a)) -static int dwc3_ulpi_busyloop(struct dwc3 *dwc) +#define DWC3_ULPI_BASE_DELAY DIV_ROUND_UP(NSEC_PER_SEC, 60000000L) + +static int dwc3_ulpi_busyloop(struct dwc3 *dwc, u8 addr, bool read) { - unsigned int count = 1000; + unsigned long ns = 5L * DWC3_ULPI_BASE_DELAY; + unsigned int count = 10000; u32 reg; + if (addr >= ULPI_EXT_VENDOR_SPECIFIC) + ns += DWC3_ULPI_BASE_DELAY; + + if (read) + ns += DWC3_ULPI_BASE_DELAY; + + reg = dwc3_readl(dwc->regs, DWC3_GUSB2PHYCFG(0)); + if (reg & DWC3_GUSB2PHYCFG_SUSPHY) + usleep_range(1000, 1200); + while (count--) { + ndelay(ns); reg = dwc3_readl(dwc->regs, DWC3_GUSB2PHYACC(0)); - if (!(reg & DWC3_GUSB2PHYACC_BUSY)) + if (reg & DWC3_GUSB2PHYACC_DONE) return 0; cpu_relax(); } @@ -38,16 +54,10 @@ static int dwc3_ulpi_read(struct device *dev, u8 addr) u32 reg; int ret; - reg = dwc3_readl(dwc->regs, DWC3_GUSB2PHYCFG(0)); - if (reg & DWC3_GUSB2PHYCFG_SUSPHY) { - reg &= ~DWC3_GUSB2PHYCFG_SUSPHY; - dwc3_writel(dwc->regs, DWC3_GUSB2PHYCFG(0), reg); - } - reg = DWC3_GUSB2PHYACC_NEWREGREQ | DWC3_ULPI_ADDR(addr); dwc3_writel(dwc->regs, DWC3_GUSB2PHYACC(0), reg); - ret = dwc3_ulpi_busyloop(dwc); + ret = dwc3_ulpi_busyloop(dwc, addr, true); if (ret) return ret; @@ -61,17 +71,11 @@ static int dwc3_ulpi_write(struct device *dev, u8 addr, u8 val) struct dwc3 *dwc = dev_get_drvdata(dev); u32 reg; - reg = dwc3_readl(dwc->regs, DWC3_GUSB2PHYCFG(0)); - if (reg & DWC3_GUSB2PHYCFG_SUSPHY) { - reg &= ~DWC3_GUSB2PHYCFG_SUSPHY; - dwc3_writel(dwc->regs, DWC3_GUSB2PHYCFG(0), reg); - } - reg = DWC3_GUSB2PHYACC_NEWREGREQ | DWC3_ULPI_ADDR(addr); reg |= DWC3_GUSB2PHYACC_WRITE | val; dwc3_writel(dwc->regs, DWC3_GUSB2PHYACC(0), reg); - return dwc3_ulpi_busyloop(dwc); + return dwc3_ulpi_busyloop(dwc, addr, false); } static const struct ulpi_ops dwc3_ulpi_ops = { diff --git a/drivers/usb/gadget/Kconfig b/drivers/usb/gadget/Kconfig index 7e47e6223089cb..2d152571a7de81 100644 --- a/drivers/usb/gadget/Kconfig +++ b/drivers/usb/gadget/Kconfig @@ -265,6 +265,7 @@ config USB_CONFIGFS_NCM depends on NET select USB_U_ETHER select USB_F_NCM + select CRC32 help NCM is an advanced protocol for Ethernet encapsulation, allows grouping of several ethernet frames into one USB transfer and @@ -314,6 +315,7 @@ config USB_CONFIGFS_EEM depends on NET select USB_U_ETHER select USB_F_EEM + select CRC32 help CDC EEM is a newer USB standard that is somewhat simpler than CDC ECM and therefore can be supported by more hardware. Technically ECM and diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index c6d455f2bb928b..1a556a628971f6 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -392,8 +392,11 @@ int usb_function_deactivate(struct usb_function *function) spin_lock_irqsave(&cdev->lock, flags); - if (cdev->deactivations == 0) + if (cdev->deactivations == 0) { + spin_unlock_irqrestore(&cdev->lock, flags); status = usb_gadget_deactivate(cdev->gadget); + spin_lock_irqsave(&cdev->lock, flags); + } if (status == 0) cdev->deactivations++; @@ -424,8 +427,11 @@ int usb_function_activate(struct usb_function *function) status = -EINVAL; else { cdev->deactivations--; - if (cdev->deactivations == 0) + if (cdev->deactivations == 0) { + spin_unlock_irqrestore(&cdev->lock, flags); status = usb_gadget_activate(cdev->gadget); + spin_lock_irqsave(&cdev->lock, flags); + } } spin_unlock_irqrestore(&cdev->lock, flags); diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c index 56051bb9734983..36ffb43f9c1a05 100644 --- a/drivers/usb/gadget/configfs.c +++ b/drivers/usb/gadget/configfs.c @@ -221,9 +221,16 @@ static ssize_t gadget_dev_desc_bcdUSB_store(struct config_item *item, static ssize_t gadget_dev_desc_UDC_show(struct config_item *item, char *page) { - char *udc_name = to_gadget_info(item)->composite.gadget_driver.udc_name; + struct gadget_info *gi = to_gadget_info(item); + char *udc_name; + int ret; + + mutex_lock(&gi->lock); + udc_name = gi->composite.gadget_driver.udc_name; + ret = sprintf(page, "%s\n", udc_name ?: ""); + mutex_unlock(&gi->lock); - return sprintf(page, "%s\n", udc_name ?: ""); + return ret; } static int unregister_gadget(struct gadget_info *gi) @@ -1248,9 +1255,9 @@ static void purge_configs_funcs(struct gadget_info *gi) cfg = container_of(c, struct config_usb_cfg, c); - list_for_each_entry_safe(f, tmp, &c->functions, list) { + list_for_each_entry_safe_reverse(f, tmp, &c->functions, list) { - list_move_tail(&f->list, &cfg->func_list); + list_move(&f->list, &cfg->func_list); if (f->unbind) { dev_dbg(&gi->cdev.gadget->dev, "unbind function '%s'/%p\n", @@ -1536,7 +1543,7 @@ static const struct usb_gadget_driver configfs_driver_template = { .suspend = configfs_composite_suspend, .resume = configfs_composite_resume, - .max_speed = USB_SPEED_SUPER, + .max_speed = USB_SPEED_SUPER_PLUS, .driver = { .owner = THIS_MODULE, .name = "configfs-gadget", @@ -1576,7 +1583,7 @@ static struct config_group *gadgets_make( gi->composite.unbind = configfs_do_nothing; gi->composite.suspend = NULL; gi->composite.resume = NULL; - gi->composite.max_speed = USB_SPEED_SUPER; + gi->composite.max_speed = USB_SPEED_SUPER_PLUS; spin_lock_init(&gi->spinlock); mutex_init(&gi->lock); diff --git a/drivers/usb/gadget/function/f_acm.c b/drivers/usb/gadget/function/f_acm.c index 46647bfac2ef8d..349945e064bba3 100644 --- a/drivers/usb/gadget/function/f_acm.c +++ b/drivers/usb/gadget/function/f_acm.c @@ -686,7 +686,7 @@ acm_bind(struct usb_configuration *c, struct usb_function *f) acm_ss_out_desc.bEndpointAddress = acm_fs_out_desc.bEndpointAddress; status = usb_assign_descriptors(f, acm_fs_function, acm_hs_function, - acm_ss_function, NULL); + acm_ss_function, acm_ss_function); if (status) goto fail; diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index c727cb5de87183..f3443347874d29 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -1328,6 +1328,7 @@ static long ffs_epfile_ioctl(struct file *file, unsigned code, switch (epfile->ffs->gadget->speed) { case USB_SPEED_SUPER: + case USB_SPEED_SUPER_PLUS: desc_idx = 2; break; case USB_SPEED_HIGH: @@ -3174,7 +3175,8 @@ static int _ffs_func_bind(struct usb_configuration *c, } if (likely(super)) { - func->function.ss_descriptors = vla_ptr(vlabuf, d, ss_descs); + func->function.ss_descriptors = func->function.ssp_descriptors = + vla_ptr(vlabuf, d, ss_descs); ss_len = ffs_do_descs(ffs->ss_descs_count, vla_ptr(vlabuf, d, raw_descs) + fs_len + hs_len, d_raw_descs__sz - fs_len - hs_len, @@ -3584,6 +3586,7 @@ static void ffs_func_unbind(struct usb_configuration *c, func->function.fs_descriptors = NULL; func->function.hs_descriptors = NULL; func->function.ss_descriptors = NULL; + func->function.ssp_descriptors = NULL; func->interfaces_nums = NULL; ffs_event_add(ffs, FUNCTIONFS_UNBIND); diff --git a/drivers/usb/gadget/function/f_midi.c b/drivers/usb/gadget/function/f_midi.c index 19d97940eeb933..8fff995b8dd501 100644 --- a/drivers/usb/gadget/function/f_midi.c +++ b/drivers/usb/gadget/function/f_midi.c @@ -1048,6 +1048,12 @@ static int f_midi_bind(struct usb_configuration *c, struct usb_function *f) f->ss_descriptors = usb_copy_descriptors(midi_function); if (!f->ss_descriptors) goto fail_f_midi; + + if (gadget_is_superspeed_plus(c->cdev->gadget)) { + f->ssp_descriptors = usb_copy_descriptors(midi_function); + if (!f->ssp_descriptors) + goto fail_f_midi; + } } kfree(midi_function); diff --git a/drivers/usb/gadget/function/f_printer.c b/drivers/usb/gadget/function/f_printer.c index 64a4112068fc8b..2f1eb2e81d306b 100644 --- a/drivers/usb/gadget/function/f_printer.c +++ b/drivers/usb/gadget/function/f_printer.c @@ -1162,6 +1162,7 @@ static int printer_func_bind(struct usb_configuration *c, printer_req_free(dev->in_ep, req); } + usb_free_all_descriptors(f); return ret; } diff --git a/drivers/usb/gadget/function/f_rndis.c b/drivers/usb/gadget/function/f_rndis.c index 9534c8ab62a8e4..0739b05a0ef7b3 100644 --- a/drivers/usb/gadget/function/f_rndis.c +++ b/drivers/usb/gadget/function/f_rndis.c @@ -87,8 +87,10 @@ static inline struct f_rndis *func_to_rndis(struct usb_function *f) /* peak (theoretical) bulk transfer rate in bits-per-second */ static unsigned int bitrate(struct usb_gadget *g) { + if (gadget_is_superspeed(g) && g->speed >= USB_SPEED_SUPER_PLUS) + return 4250000000U; if (gadget_is_superspeed(g) && g->speed == USB_SPEED_SUPER) - return 13 * 1024 * 8 * 1000 * 8; + return 3750000000U; else if (gadget_is_dualspeed(g) && g->speed == USB_SPEED_HIGH) return 13 * 512 * 8 * 1000 * 8; else diff --git a/drivers/usb/gadget/function/f_uac2.c b/drivers/usb/gadget/function/f_uac2.c index 3633df6d7610f0..5d960b6603b6f0 100644 --- a/drivers/usb/gadget/function/f_uac2.c +++ b/drivers/usb/gadget/function/f_uac2.c @@ -271,7 +271,7 @@ static struct usb_endpoint_descriptor fs_epout_desc = { .bEndpointAddress = USB_DIR_OUT, .bmAttributes = USB_ENDPOINT_XFER_ISOC | USB_ENDPOINT_SYNC_ASYNC, - .wMaxPacketSize = cpu_to_le16(1023), + /* .wMaxPacketSize = DYNAMIC */ .bInterval = 1, }; @@ -280,7 +280,7 @@ static struct usb_endpoint_descriptor hs_epout_desc = { .bDescriptorType = USB_DT_ENDPOINT, .bmAttributes = USB_ENDPOINT_XFER_ISOC | USB_ENDPOINT_SYNC_ASYNC, - .wMaxPacketSize = cpu_to_le16(1024), + /* .wMaxPacketSize = DYNAMIC */ .bInterval = 4, }; @@ -348,7 +348,7 @@ static struct usb_endpoint_descriptor fs_epin_desc = { .bEndpointAddress = USB_DIR_IN, .bmAttributes = USB_ENDPOINT_XFER_ISOC | USB_ENDPOINT_SYNC_ASYNC, - .wMaxPacketSize = cpu_to_le16(1023), + /* .wMaxPacketSize = DYNAMIC */ .bInterval = 1, }; @@ -357,7 +357,7 @@ static struct usb_endpoint_descriptor hs_epin_desc = { .bDescriptorType = USB_DT_ENDPOINT, .bmAttributes = USB_ENDPOINT_XFER_ISOC | USB_ENDPOINT_SYNC_ASYNC, - .wMaxPacketSize = cpu_to_le16(1024), + /* .wMaxPacketSize = DYNAMIC */ .bInterval = 4, }; @@ -444,12 +444,28 @@ struct cntrl_range_lay3 { __le32 dRES; } __packed; -static void set_ep_max_packet_size(const struct f_uac2_opts *uac2_opts, +static int set_ep_max_packet_size(const struct f_uac2_opts *uac2_opts, struct usb_endpoint_descriptor *ep_desc, - unsigned int factor, bool is_playback) + enum usb_device_speed speed, bool is_playback) { int chmask, srate, ssize; - u16 max_packet_size; + u16 max_size_bw, max_size_ep; + unsigned int factor; + + switch (speed) { + case USB_SPEED_FULL: + max_size_ep = 1023; + factor = 1000; + break; + + case USB_SPEED_HIGH: + max_size_ep = 1024; + factor = 8000; + break; + + default: + return -EINVAL; + } if (is_playback) { chmask = uac2_opts->p_chmask; @@ -461,10 +477,12 @@ static void set_ep_max_packet_size(const struct f_uac2_opts *uac2_opts, ssize = uac2_opts->c_ssize; } - max_packet_size = num_channels(chmask) * ssize * + max_size_bw = num_channels(chmask) * ssize * DIV_ROUND_UP(srate, factor / (1 << (ep_desc->bInterval - 1))); - ep_desc->wMaxPacketSize = cpu_to_le16(min_t(u16, max_packet_size, - le16_to_cpu(ep_desc->wMaxPacketSize))); + ep_desc->wMaxPacketSize = cpu_to_le16(min_t(u16, max_size_bw, + max_size_ep)); + + return 0; } /* Use macro to overcome line length limitation */ @@ -670,10 +688,33 @@ afunc_bind(struct usb_configuration *cfg, struct usb_function *fn) } /* Calculate wMaxPacketSize according to audio bandwidth */ - set_ep_max_packet_size(uac2_opts, &fs_epin_desc, 1000, true); - set_ep_max_packet_size(uac2_opts, &fs_epout_desc, 1000, false); - set_ep_max_packet_size(uac2_opts, &hs_epin_desc, 8000, true); - set_ep_max_packet_size(uac2_opts, &hs_epout_desc, 8000, false); + ret = set_ep_max_packet_size(uac2_opts, &fs_epin_desc, USB_SPEED_FULL, + true); + if (ret < 0) { + dev_err(dev, "%s:%d Error!\n", __func__, __LINE__); + return ret; + } + + ret = set_ep_max_packet_size(uac2_opts, &fs_epout_desc, USB_SPEED_FULL, + false); + if (ret < 0) { + dev_err(dev, "%s:%d Error!\n", __func__, __LINE__); + return ret; + } + + ret = set_ep_max_packet_size(uac2_opts, &hs_epin_desc, USB_SPEED_HIGH, + true); + if (ret < 0) { + dev_err(dev, "%s:%d Error!\n", __func__, __LINE__); + return ret; + } + + ret = set_ep_max_packet_size(uac2_opts, &hs_epout_desc, USB_SPEED_HIGH, + false); + if (ret < 0) { + dev_err(dev, "%s:%d Error!\n", __func__, __LINE__); + return ret; + } if (EPOUT_EN(uac2_opts)) { agdev->out_ep = usb_ep_autoconfig(gadget, &fs_epout_desc); diff --git a/drivers/usb/gadget/function/u_ether.c b/drivers/usb/gadget/function/u_ether.c index 31ea76adcc0db3..c019f2b0c0af3d 100644 --- a/drivers/usb/gadget/function/u_ether.c +++ b/drivers/usb/gadget/function/u_ether.c @@ -45,9 +45,10 @@ #define UETH__VERSION "29-May-2008" /* Experiments show that both Linux and Windows hosts allow up to 16k - * frame sizes. Set the max size to 15k+52 to prevent allocating 32k + * frame sizes. Set the max MTU size to 15k+52 to prevent allocating 32k * blocks and still have efficient handling. */ -#define GETHER_MAX_ETH_FRAME_LEN 15412 +#define GETHER_MAX_MTU_SIZE 15412 +#define GETHER_MAX_ETH_FRAME_LEN (GETHER_MAX_MTU_SIZE + ETH_HLEN) struct eth_dev { /* lock is held while accessing port_usb @@ -786,7 +787,7 @@ struct eth_dev *gether_setup_name(struct usb_gadget *g, /* MTU range: 14 - 15412 */ net->min_mtu = ETH_HLEN; - net->max_mtu = GETHER_MAX_ETH_FRAME_LEN; + net->max_mtu = GETHER_MAX_MTU_SIZE; dev->gadget = g; SET_NETDEV_DEV(net, &g->dev); @@ -848,7 +849,7 @@ struct net_device *gether_setup_name_default(const char *netname) /* MTU range: 14 - 15412 */ net->min_mtu = ETH_HLEN; - net->max_mtu = GETHER_MAX_ETH_FRAME_LEN; + net->max_mtu = GETHER_MAX_MTU_SIZE; return net; } diff --git a/drivers/usb/gadget/legacy/acm_ms.c b/drivers/usb/gadget/legacy/acm_ms.c index 59be2d8417c9ce..e8033e5f0c18e4 100644 --- a/drivers/usb/gadget/legacy/acm_ms.c +++ b/drivers/usb/gadget/legacy/acm_ms.c @@ -200,8 +200,10 @@ static int acm_ms_bind(struct usb_composite_dev *cdev) struct usb_descriptor_header *usb_desc; usb_desc = usb_otg_descriptor_alloc(gadget); - if (!usb_desc) + if (!usb_desc) { + status = -ENOMEM; goto fail_string_ids; + } usb_otg_descriptor_init(gadget, usb_desc); otg_desc[0] = usb_desc; otg_desc[1] = NULL; diff --git a/drivers/usb/gadget/udc/aspeed-vhub/epn.c b/drivers/usb/gadget/udc/aspeed-vhub/epn.c index 0bd6b20435b8a5..02d8bfae58fb14 100644 --- a/drivers/usb/gadget/udc/aspeed-vhub/epn.c +++ b/drivers/usb/gadget/udc/aspeed-vhub/epn.c @@ -420,7 +420,10 @@ static void ast_vhub_stop_active_req(struct ast_vhub_ep *ep, u32 state, reg, loops; /* Stop DMA activity */ - writel(0, ep->epn.regs + AST_VHUB_EP_DMA_CTLSTAT); + if (ep->epn.desc_mode) + writel(VHUB_EP_DMA_CTRL_RESET, ep->epn.regs + AST_VHUB_EP_DMA_CTLSTAT); + else + writel(0, ep->epn.regs + AST_VHUB_EP_DMA_CTLSTAT); /* Wait for it to complete */ for (loops = 0; loops < 1000; loops++) { diff --git a/drivers/usb/gadget/udc/bdc/Kconfig b/drivers/usb/gadget/udc/bdc/Kconfig index 3e88c7670b2ed4..fb01ff47b64cf6 100644 --- a/drivers/usb/gadget/udc/bdc/Kconfig +++ b/drivers/usb/gadget/udc/bdc/Kconfig @@ -17,7 +17,7 @@ if USB_BDC_UDC comment "Platform Support" config USB_BDC_PCI tristate "BDC support for PCIe based platforms" - depends on USB_PCI + depends on USB_PCI && BROKEN default USB_BDC_UDC help Enable support for platforms which have BDC connected through PCIe, such as Lego3 FPGA platform. diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c index debf54205d22e3..da691a69fec10c 100644 --- a/drivers/usb/gadget/udc/core.c +++ b/drivers/usb/gadget/udc/core.c @@ -1532,10 +1532,13 @@ static ssize_t soft_connect_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t n) { struct usb_udc *udc = container_of(dev, struct usb_udc, dev); + ssize_t ret; + mutex_lock(&udc_lock); if (!udc->driver) { dev_err(dev, "soft-connect without a gadget driver\n"); - return -EOPNOTSUPP; + ret = -EOPNOTSUPP; + goto out; } if (sysfs_streq(buf, "connect")) { @@ -1546,10 +1549,14 @@ static ssize_t soft_connect_store(struct device *dev, usb_gadget_udc_stop(udc); } else { dev_err(dev, "unsupported command '%s'\n", buf); - return -EINVAL; + ret = -EINVAL; + goto out; } - return n; + ret = n; +out: + mutex_unlock(&udc_lock); + return ret; } static DEVICE_ATTR_WO(soft_connect); diff --git a/drivers/usb/gadget/udc/dummy_hcd.c b/drivers/usb/gadget/udc/dummy_hcd.c index 53a227217f1cbc..17704ee2d7f54e 100644 --- a/drivers/usb/gadget/udc/dummy_hcd.c +++ b/drivers/usb/gadget/udc/dummy_hcd.c @@ -2114,9 +2114,21 @@ static int dummy_hub_control( dum_hcd->port_status &= ~USB_PORT_STAT_POWER; set_link_state(dum_hcd); break; - default: + case USB_PORT_FEAT_ENABLE: + case USB_PORT_FEAT_C_ENABLE: + case USB_PORT_FEAT_C_SUSPEND: + /* Not allowed for USB-3 */ + if (hcd->speed == HCD_USB3) + goto error; + fallthrough; + case USB_PORT_FEAT_C_CONNECTION: + case USB_PORT_FEAT_C_RESET: dum_hcd->port_status &= ~(1 << wValue); set_link_state(dum_hcd); + break; + default: + /* Disallow INDICATOR and C_OVER_CURRENT */ + goto error; } break; case GetHubDescriptor: @@ -2254,17 +2266,20 @@ static int dummy_hub_control( } fallthrough; case USB_PORT_FEAT_RESET: + if (!(dum_hcd->port_status & USB_PORT_STAT_CONNECTION)) + break; /* if it's already enabled, disable */ if (hcd->speed == HCD_USB3) { - dum_hcd->port_status = 0; dum_hcd->port_status = (USB_SS_PORT_STAT_POWER | USB_PORT_STAT_CONNECTION | USB_PORT_STAT_RESET); - } else + } else { dum_hcd->port_status &= ~(USB_PORT_STAT_ENABLE | USB_PORT_STAT_LOW_SPEED | USB_PORT_STAT_HIGH_SPEED); + dum_hcd->port_status |= USB_PORT_STAT_RESET; + } /* * We want to reset device status. All but the * Self powered feature @@ -2276,19 +2291,19 @@ static int dummy_hub_control( * interval? Is it still 50msec as for HS? */ dum_hcd->re_timeout = jiffies + msecs_to_jiffies(50); - fallthrough; - default: - if (hcd->speed == HCD_USB3) { - if ((dum_hcd->port_status & - USB_SS_PORT_STAT_POWER) != 0) { - dum_hcd->port_status |= (1 << wValue); - } - } else - if ((dum_hcd->port_status & - USB_PORT_STAT_POWER) != 0) { - dum_hcd->port_status |= (1 << wValue); - } set_link_state(dum_hcd); + break; + case USB_PORT_FEAT_C_CONNECTION: + case USB_PORT_FEAT_C_RESET: + case USB_PORT_FEAT_C_ENABLE: + case USB_PORT_FEAT_C_SUSPEND: + /* Not allowed for USB-3, and ignored for USB-2 */ + if (hcd->speed == HCD_USB3) + goto error; + break; + default: + /* Disallow TEST, INDICATOR, and C_OVER_CURRENT */ + goto error; } break; case GetPortErrorCount: @@ -2734,7 +2749,7 @@ static int __init init(void) { int retval = -ENOMEM; int i; - struct dummy *dum[MAX_NUM_UDC]; + struct dummy *dum[MAX_NUM_UDC] = {}; if (usb_disabled()) return -ENODEV; diff --git a/drivers/usb/host/ehci-hcd.c b/drivers/usb/host/ehci-hcd.c index 3575b720188104..b5db2b2d0901a3 100644 --- a/drivers/usb/host/ehci-hcd.c +++ b/drivers/usb/host/ehci-hcd.c @@ -574,6 +574,7 @@ static int ehci_run (struct usb_hcd *hcd) struct ehci_hcd *ehci = hcd_to_ehci (hcd); u32 temp; u32 hcc_params; + int rc; hcd->uses_new_polling = 1; @@ -629,9 +630,20 @@ static int ehci_run (struct usb_hcd *hcd) down_write(&ehci_cf_port_reset_rwsem); ehci->rh_state = EHCI_RH_RUNNING; ehci_writel(ehci, FLAG_CF, &ehci->regs->configured_flag); + + /* Wait until HC become operational */ ehci_readl(ehci, &ehci->regs->command); /* unblock posted writes */ msleep(5); + rc = ehci_handshake(ehci, &ehci->regs->status, STS_HALT, 0, 100 * 1000); + up_write(&ehci_cf_port_reset_rwsem); + + if (rc) { + ehci_err(ehci, "USB %x.%x, controller refused to start: %d\n", + ((ehci->sbrn & 0xf0)>>4), (ehci->sbrn & 0x0f), rc); + return rc; + } + ehci->last_periodic_enable = ktime_get_real(); temp = HC_VERSION(ehci, ehci_readl(ehci, &ehci->caps->hc_capbase)); diff --git a/drivers/usb/host/ehci-hub.c b/drivers/usb/host/ehci-hub.c index 087402aec5cbeb..9f9ab5ccea889d 100644 --- a/drivers/usb/host/ehci-hub.c +++ b/drivers/usb/host/ehci-hub.c @@ -345,6 +345,9 @@ static int ehci_bus_suspend (struct usb_hcd *hcd) unlink_empty_async_suspended(ehci); + /* Some Synopsys controllers mistakenly leave IAA turned on */ + ehci_writel(ehci, STS_IAA, &ehci->regs->status); + /* Any IAA cycle that started before the suspend is now invalid */ end_iaa_cycle(ehci); ehci_handle_start_intr_unlinks(ehci); diff --git a/drivers/usb/host/ehci-omap.c b/drivers/usb/host/ehci-omap.c index 8771a2ed69268f..7f4a03e8647af6 100644 --- a/drivers/usb/host/ehci-omap.c +++ b/drivers/usb/host/ehci-omap.c @@ -220,6 +220,7 @@ static int ehci_hcd_omap_probe(struct platform_device *pdev) err_pm_runtime: pm_runtime_put_sync(dev); + pm_runtime_disable(dev); err_phy: for (i = 0; i < omap->nports; i++) { diff --git a/drivers/usb/host/max3421-hcd.c b/drivers/usb/host/max3421-hcd.c index 0894f6caccb2cd..ebb8180b52ab17 100644 --- a/drivers/usb/host/max3421-hcd.c +++ b/drivers/usb/host/max3421-hcd.c @@ -1847,7 +1847,7 @@ max3421_probe(struct spi_device *spi) struct max3421_hcd *max3421_hcd; struct usb_hcd *hcd = NULL; struct max3421_hcd_platform_data *pdata = NULL; - int retval = -ENOMEM; + int retval; if (spi_setup(spi) < 0) { dev_err(&spi->dev, "Unable to setup SPI bus"); @@ -1889,6 +1889,7 @@ max3421_probe(struct spi_device *spi) goto error; } + retval = -ENOMEM; hcd = usb_create_hcd(&max3421_hcd_desc, &spi->dev, dev_name(&spi->dev)); if (!hcd) { diff --git a/drivers/usb/host/oxu210hp-hcd.c b/drivers/usb/host/oxu210hp-hcd.c index 27dbbe1b28b12c..e832909a924fa6 100644 --- a/drivers/usb/host/oxu210hp-hcd.c +++ b/drivers/usb/host/oxu210hp-hcd.c @@ -4151,8 +4151,10 @@ static struct usb_hcd *oxu_create(struct platform_device *pdev, oxu->is_otg = otg; ret = usb_add_hcd(hcd, irq, IRQF_SHARED); - if (ret < 0) + if (ret < 0) { + usb_put_hcd(hcd); return ERR_PTR(ret); + } device_wakeup_enable(hcd->self.controller); return hcd; diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c index c799ca5361d4d9..74c497fd347620 100644 --- a/drivers/usb/host/xhci-hub.c +++ b/drivers/usb/host/xhci-hub.c @@ -1712,6 +1712,10 @@ int xhci_bus_suspend(struct usb_hcd *hcd) hcd->state = HC_STATE_SUSPENDED; bus_state->next_statechange = jiffies + msecs_to_jiffies(10); spin_unlock_irqrestore(&xhci->lock, flags); + + if (bus_state->bus_suspended) + usleep_range(5000, 10000); + return 0; } diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index bf89172c43cace..84da8406d5b424 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -47,6 +47,7 @@ #define PCI_DEVICE_ID_INTEL_DNV_XHCI 0x19d0 #define PCI_DEVICE_ID_INTEL_ALPINE_RIDGE_2C_XHCI 0x15b5 #define PCI_DEVICE_ID_INTEL_ALPINE_RIDGE_4C_XHCI 0x15b6 +#define PCI_DEVICE_ID_INTEL_ALPINE_RIDGE_LP_XHCI 0x15c1 #define PCI_DEVICE_ID_INTEL_ALPINE_RIDGE_C_2C_XHCI 0x15db #define PCI_DEVICE_ID_INTEL_ALPINE_RIDGE_C_4C_XHCI 0x15d4 #define PCI_DEVICE_ID_INTEL_TITAN_RIDGE_2C_XHCI 0x15e9 @@ -55,6 +56,7 @@ #define PCI_DEVICE_ID_INTEL_ICE_LAKE_XHCI 0x8a13 #define PCI_DEVICE_ID_INTEL_CML_XHCI 0xa3af #define PCI_DEVICE_ID_INTEL_TIGER_LAKE_XHCI 0x9a13 +#define PCI_DEVICE_ID_INTEL_MAPLE_RIDGE_XHCI 0x1138 #define PCI_DEVICE_ID_AMD_PROMONTORYA_4 0x43b9 #define PCI_DEVICE_ID_AMD_PROMONTORYA_3 0x43ba @@ -232,13 +234,15 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) if (pdev->vendor == PCI_VENDOR_ID_INTEL && (pdev->device == PCI_DEVICE_ID_INTEL_ALPINE_RIDGE_2C_XHCI || pdev->device == PCI_DEVICE_ID_INTEL_ALPINE_RIDGE_4C_XHCI || + pdev->device == PCI_DEVICE_ID_INTEL_ALPINE_RIDGE_LP_XHCI || pdev->device == PCI_DEVICE_ID_INTEL_ALPINE_RIDGE_C_2C_XHCI || pdev->device == PCI_DEVICE_ID_INTEL_ALPINE_RIDGE_C_4C_XHCI || pdev->device == PCI_DEVICE_ID_INTEL_TITAN_RIDGE_2C_XHCI || pdev->device == PCI_DEVICE_ID_INTEL_TITAN_RIDGE_4C_XHCI || pdev->device == PCI_DEVICE_ID_INTEL_TITAN_RIDGE_DD_XHCI || pdev->device == PCI_DEVICE_ID_INTEL_ICE_LAKE_XHCI || - pdev->device == PCI_DEVICE_ID_INTEL_TIGER_LAKE_XHCI)) + pdev->device == PCI_DEVICE_ID_INTEL_TIGER_LAKE_XHCI || + pdev->device == PCI_DEVICE_ID_INTEL_MAPLE_RIDGE_XHCI)) xhci->quirks |= XHCI_DEFAULT_PM_RUNTIME_ALLOW; if (pdev->vendor == PCI_VENDOR_ID_ETRON && diff --git a/drivers/usb/host/xhci-plat.c b/drivers/usb/host/xhci-plat.c index aa2d35f982002e..4d34f6005381e6 100644 --- a/drivers/usb/host/xhci-plat.c +++ b/drivers/usb/host/xhci-plat.c @@ -333,6 +333,9 @@ static int xhci_plat_probe(struct platform_device *pdev) if (priv && (priv->quirks & XHCI_SKIP_PHY_INIT)) hcd->skip_phy_initialization = 1; + if (priv && (priv->quirks & XHCI_SG_TRB_CACHE_SIZE_QUIRK)) + xhci->quirks |= XHCI_SG_TRB_CACHE_SIZE_QUIRK; + ret = usb_add_hcd(hcd, irq, IRQF_SHARED); if (ret) goto disable_usb_phy; diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 167dae117f7387..db8612ec82d3ee 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -2930,6 +2930,8 @@ static void queue_trb(struct xhci_hcd *xhci, struct xhci_ring *ring, trb->field[0] = cpu_to_le32(field1); trb->field[1] = cpu_to_le32(field2); trb->field[2] = cpu_to_le32(field3); + /* make sure TRB is fully written before giving it to the controller */ + wmb(); trb->field[3] = cpu_to_le32(field4); trace_xhci_queue_trb(ring, trb); diff --git a/drivers/usb/host/xhci-tegra.c b/drivers/usb/host/xhci-tegra.c index 934be168635230..50bb91b6a4b8d8 100644 --- a/drivers/usb/host/xhci-tegra.c +++ b/drivers/usb/host/xhci-tegra.c @@ -623,6 +623,13 @@ static void tegra_xusb_mbox_handle(struct tegra_xusb *tegra, enable); if (err < 0) break; + + /* + * wait 500us for LFPS detector to be disabled before + * sending ACK + */ + if (!enable) + usleep_range(500, 1000); } if (err < 0) { diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index d4a8d0efbbc4d6..73f1373d517a24 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -4646,19 +4646,19 @@ static u16 xhci_calculate_u1_timeout(struct xhci_hcd *xhci, { unsigned long long timeout_ns; + if (xhci->quirks & XHCI_INTEL_HOST) + timeout_ns = xhci_calculate_intel_u1_timeout(udev, desc); + else + timeout_ns = udev->u1_params.sel; + /* Prevent U1 if service interval is shorter than U1 exit latency */ if (usb_endpoint_xfer_int(desc) || usb_endpoint_xfer_isoc(desc)) { - if (xhci_service_interval_to_ns(desc) <= udev->u1_params.mel) { + if (xhci_service_interval_to_ns(desc) <= timeout_ns) { dev_dbg(&udev->dev, "Disable U1, ESIT shorter than exit latency\n"); return USB3_LPM_DISABLED; } } - if (xhci->quirks & XHCI_INTEL_HOST) - timeout_ns = xhci_calculate_intel_u1_timeout(udev, desc); - else - timeout_ns = udev->u1_params.sel; - /* The U1 timeout is encoded in 1us intervals. * Don't return a timeout of zero, because that's USB3_LPM_DISABLED. */ @@ -4710,19 +4710,19 @@ static u16 xhci_calculate_u2_timeout(struct xhci_hcd *xhci, { unsigned long long timeout_ns; + if (xhci->quirks & XHCI_INTEL_HOST) + timeout_ns = xhci_calculate_intel_u2_timeout(udev, desc); + else + timeout_ns = udev->u2_params.sel; + /* Prevent U2 if service interval is shorter than U2 exit latency */ if (usb_endpoint_xfer_int(desc) || usb_endpoint_xfer_isoc(desc)) { - if (xhci_service_interval_to_ns(desc) <= udev->u2_params.mel) { + if (xhci_service_interval_to_ns(desc) <= timeout_ns) { dev_dbg(&udev->dev, "Disable U2, ESIT shorter than exit latency\n"); return USB3_LPM_DISABLED; } } - if (xhci->quirks & XHCI_INTEL_HOST) - timeout_ns = xhci_calculate_intel_u2_timeout(udev, desc); - else - timeout_ns = udev->u2_params.sel; - /* The U2 timeout is encoded in 256us intervals */ timeout_ns = DIV_ROUND_UP_ULL(timeout_ns, 256 * 1000); /* If the necessary timeout value is bigger than what we can set in the diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index ebb359ebb261c1..d90c0d5df3b37e 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1878,6 +1878,7 @@ struct xhci_hcd { #define XHCI_RENESAS_FW_QUIRK BIT_ULL(36) #define XHCI_SKIP_PHY_INIT BIT_ULL(37) #define XHCI_DISABLE_SPARSE BIT_ULL(38) +#define XHCI_SG_TRB_CACHE_SIZE_QUIRK BIT_ULL(39) unsigned int num_active_eps; unsigned int limit_active_eps; diff --git a/drivers/usb/misc/legousbtower.c b/drivers/usb/misc/legousbtower.c index ba655b4af4fc2b..1c9e09138c1090 100644 --- a/drivers/usb/misc/legousbtower.c +++ b/drivers/usb/misc/legousbtower.c @@ -797,7 +797,7 @@ static int tower_probe(struct usb_interface *interface, const struct usb_device_ &get_version_reply, sizeof(get_version_reply), 1000, GFP_KERNEL); - if (!result) { + if (result) { dev_err(idev, "get version request failed: %d\n", result); retval = result; goto error; diff --git a/drivers/usb/misc/sisusbvga/Kconfig b/drivers/usb/misc/sisusbvga/Kconfig index 655d9cb0651a79..c12cdd01541024 100644 --- a/drivers/usb/misc/sisusbvga/Kconfig +++ b/drivers/usb/misc/sisusbvga/Kconfig @@ -16,7 +16,7 @@ config USB_SISUSBVGA config USB_SISUSBVGA_CON bool "Text console and mode switching support" if USB_SISUSBVGA - depends on VT + depends on VT && BROKEN select FONT_8x16 help Say Y here if you want a VGA text console via the USB dongle or diff --git a/drivers/usb/misc/yurex.c b/drivers/usb/misc/yurex.c index e3165d79b5f641..6c3d760bd4dd83 100644 --- a/drivers/usb/misc/yurex.c +++ b/drivers/usb/misc/yurex.c @@ -495,6 +495,9 @@ static ssize_t yurex_write(struct file *file, const char __user *user_buffer, timeout = schedule_timeout(YUREX_WRITE_TIMEOUT); finish_wait(&dev->waitq, &wait); + /* make sure URB is idle after timeout or (spurious) CMD_ACK */ + usb_kill_urb(dev->cntl_urb); + mutex_unlock(&dev->io_mutex); if (retval < 0) { diff --git a/drivers/usb/mtu3/mtu3_debugfs.c b/drivers/usb/mtu3/mtu3_debugfs.c index fdeade6254aecc..7537bfd651af6d 100644 --- a/drivers/usb/mtu3/mtu3_debugfs.c +++ b/drivers/usb/mtu3/mtu3_debugfs.c @@ -127,7 +127,7 @@ static void mtu3_debugfs_regset(struct mtu3 *mtu, void __iomem *base, struct debugfs_regset32 *regset; struct mtu3_regset *mregs; - mregs = devm_kzalloc(mtu->dev, sizeof(*regset), GFP_KERNEL); + mregs = devm_kzalloc(mtu->dev, sizeof(*mregs), GFP_KERNEL); if (!mregs) return; diff --git a/drivers/usb/serial/digi_acceleport.c b/drivers/usb/serial/digi_acceleport.c index 91055a191995fc..0d606fa9fdca1a 100644 --- a/drivers/usb/serial/digi_acceleport.c +++ b/drivers/usb/serial/digi_acceleport.c @@ -19,7 +19,6 @@ #include #include #include -#include #include #include #include @@ -198,14 +197,12 @@ struct digi_port { int dp_throttle_restart; wait_queue_head_t dp_flush_wait; wait_queue_head_t dp_close_wait; /* wait queue for close */ - struct work_struct dp_wakeup_work; struct usb_serial_port *dp_port; }; /* Local Function Declarations */ -static void digi_wakeup_write_lock(struct work_struct *work); static int digi_write_oob_command(struct usb_serial_port *port, unsigned char *buf, int count, int interruptible); static int digi_write_inb_command(struct usb_serial_port *port, @@ -356,26 +353,6 @@ __releases(lock) return timeout; } - -/* - * Digi Wakeup Write - * - * Wake up port, line discipline, and tty processes sleeping - * on writes. - */ - -static void digi_wakeup_write_lock(struct work_struct *work) -{ - struct digi_port *priv = - container_of(work, struct digi_port, dp_wakeup_work); - struct usb_serial_port *port = priv->dp_port; - unsigned long flags; - - spin_lock_irqsave(&priv->dp_port_lock, flags); - tty_port_tty_wakeup(&port->port); - spin_unlock_irqrestore(&priv->dp_port_lock, flags); -} - /* * Digi Write OOB Command * @@ -986,6 +963,7 @@ static void digi_write_bulk_callback(struct urb *urb) unsigned long flags; int ret = 0; int status = urb->status; + bool wakeup; /* port and serial sanity check */ if (port == NULL || (priv = usb_get_serial_port_data(port)) == NULL) { @@ -1012,6 +990,7 @@ static void digi_write_bulk_callback(struct urb *urb) } /* try to send any buffered data on this port */ + wakeup = true; spin_lock_irqsave(&priv->dp_port_lock, flags); priv->dp_write_urb_in_use = 0; if (priv->dp_out_buf_len > 0) { @@ -1027,19 +1006,18 @@ static void digi_write_bulk_callback(struct urb *urb) if (ret == 0) { priv->dp_write_urb_in_use = 1; priv->dp_out_buf_len = 0; + wakeup = false; } } - /* wake up processes sleeping on writes immediately */ - tty_port_tty_wakeup(&port->port); - /* also queue up a wakeup at scheduler time, in case we */ - /* lost the race in write_chan(). */ - schedule_work(&priv->dp_wakeup_work); - spin_unlock_irqrestore(&priv->dp_port_lock, flags); + if (ret && ret != -EPERM) dev_err_console(port, "%s: usb_submit_urb failed, ret=%d, port=%d\n", __func__, ret, priv->dp_port_num); + + if (wakeup) + tty_port_tty_wakeup(&port->port); } static int digi_write_room(struct tty_struct *tty) @@ -1239,7 +1217,6 @@ static int digi_port_init(struct usb_serial_port *port, unsigned port_num) init_waitqueue_head(&priv->dp_transmit_idle_wait); init_waitqueue_head(&priv->dp_flush_wait); init_waitqueue_head(&priv->dp_close_wait); - INIT_WORK(&priv->dp_wakeup_work, digi_wakeup_write_lock); priv->dp_port = port; init_waitqueue_head(&port->write_wait); @@ -1508,13 +1485,14 @@ static int digi_read_oob_callback(struct urb *urb) rts = C_CRTSCTS(tty); if (tty && opcode == DIGI_CMD_READ_INPUT_SIGNALS) { + bool wakeup = false; + spin_lock_irqsave(&priv->dp_port_lock, flags); /* convert from digi flags to termiox flags */ if (val & DIGI_READ_INPUT_SIGNALS_CTS) { priv->dp_modem_signals |= TIOCM_CTS; - /* port must be open to use tty struct */ if (rts) - tty_port_tty_wakeup(&port->port); + wakeup = true; } else { priv->dp_modem_signals &= ~TIOCM_CTS; /* port must be open to use tty struct */ @@ -1533,6 +1511,9 @@ static int digi_read_oob_callback(struct urb *urb) priv->dp_modem_signals &= ~TIOCM_CD; spin_unlock_irqrestore(&priv->dp_port_lock, flags); + + if (wakeup) + tty_port_tty_wakeup(&port->port); } else if (opcode == DIGI_CMD_TRANSMIT_IDLE) { spin_lock_irqsave(&priv->dp_port_lock, flags); priv->dp_transmit_idle = 1; diff --git a/drivers/usb/serial/iuu_phoenix.c b/drivers/usb/serial/iuu_phoenix.c index b4ba79123d9dae..c14205190e7a79 100644 --- a/drivers/usb/serial/iuu_phoenix.c +++ b/drivers/usb/serial/iuu_phoenix.c @@ -532,23 +532,29 @@ static int iuu_uart_flush(struct usb_serial_port *port) struct device *dev = &port->dev; int i; int status; - u8 rxcmd = IUU_UART_RX; + u8 *rxcmd; struct iuu_private *priv = usb_get_serial_port_data(port); if (iuu_led(port, 0xF000, 0, 0, 0xFF) < 0) return -EIO; + rxcmd = kmalloc(1, GFP_KERNEL); + if (!rxcmd) + return -ENOMEM; + + rxcmd[0] = IUU_UART_RX; + for (i = 0; i < 2; i++) { - status = bulk_immediate(port, &rxcmd, 1); + status = bulk_immediate(port, rxcmd, 1); if (status != IUU_OPERATION_OK) { dev_dbg(dev, "%s - uart_flush_write error\n", __func__); - return status; + goto out_free; } status = read_immediate(port, &priv->len, 1); if (status != IUU_OPERATION_OK) { dev_dbg(dev, "%s - uart_flush_read error\n", __func__); - return status; + goto out_free; } if (priv->len > 0) { @@ -556,12 +562,16 @@ static int iuu_uart_flush(struct usb_serial_port *port) status = read_immediate(port, priv->buf, priv->len); if (status != IUU_OPERATION_OK) { dev_dbg(dev, "%s - uart_flush_read error\n", __func__); - return status; + goto out_free; } } } dev_dbg(dev, "%s - uart_flush_read OK!\n", __func__); iuu_led(port, 0, 0xF000, 0, 0xFF); + +out_free: + kfree(rxcmd); + return status; } diff --git a/drivers/usb/serial/keyspan_pda.c b/drivers/usb/serial/keyspan_pda.c index c1333919716b67..aec32bf06e018c 100644 --- a/drivers/usb/serial/keyspan_pda.c +++ b/drivers/usb/serial/keyspan_pda.c @@ -40,11 +40,12 @@ #define DRIVER_AUTHOR "Brian Warner " #define DRIVER_DESC "USB Keyspan PDA Converter driver" +#define KEYSPAN_TX_THRESHOLD 16 + struct keyspan_pda_private { int tx_room; int tx_throttled; - struct work_struct wakeup_work; - struct work_struct unthrottle_work; + struct work_struct unthrottle_work; struct usb_serial *serial; struct usb_serial_port *port; }; @@ -97,15 +98,6 @@ static const struct usb_device_id id_table_fake_xircom[] = { }; #endif -static void keyspan_pda_wakeup_write(struct work_struct *work) -{ - struct keyspan_pda_private *priv = - container_of(work, struct keyspan_pda_private, wakeup_work); - struct usb_serial_port *port = priv->port; - - tty_port_tty_wakeup(&port->port); -} - static void keyspan_pda_request_unthrottle(struct work_struct *work) { struct keyspan_pda_private *priv = @@ -120,7 +112,7 @@ static void keyspan_pda_request_unthrottle(struct work_struct *work) 7, /* request_unthrottle */ USB_TYPE_VENDOR | USB_RECIP_INTERFACE | USB_DIR_OUT, - 16, /* value: threshold */ + KEYSPAN_TX_THRESHOLD, 0, /* index */ NULL, 0, @@ -139,6 +131,8 @@ static void keyspan_pda_rx_interrupt(struct urb *urb) int retval; int status = urb->status; struct keyspan_pda_private *priv; + unsigned long flags; + priv = usb_get_serial_port_data(port); switch (status) { @@ -172,18 +166,21 @@ static void keyspan_pda_rx_interrupt(struct urb *urb) break; case 1: /* status interrupt */ - if (len < 3) { + if (len < 2) { dev_warn(&port->dev, "short interrupt message received\n"); break; } - dev_dbg(&port->dev, "rx int, d1=%d, d2=%d\n", data[1], data[2]); + dev_dbg(&port->dev, "rx int, d1=%d\n", data[1]); switch (data[1]) { case 1: /* modemline change */ break; case 2: /* tx unthrottle interrupt */ + spin_lock_irqsave(&port->lock, flags); priv->tx_throttled = 0; + priv->tx_room = max(priv->tx_room, KEYSPAN_TX_THRESHOLD); + spin_unlock_irqrestore(&port->lock, flags); /* queue up a wakeup at scheduler time */ - schedule_work(&priv->wakeup_work); + usb_serial_port_softint(port); break; default: break; @@ -443,6 +440,7 @@ static int keyspan_pda_write(struct tty_struct *tty, int request_unthrottle = 0; int rc = 0; struct keyspan_pda_private *priv; + unsigned long flags; priv = usb_get_serial_port_data(port); /* guess how much room is left in the device's ring buffer, and if we @@ -462,13 +460,13 @@ static int keyspan_pda_write(struct tty_struct *tty, the TX urb is in-flight (wait until it completes) the device is full (wait until it says there is room) */ - spin_lock_bh(&port->lock); + spin_lock_irqsave(&port->lock, flags); if (!test_bit(0, &port->write_urbs_free) || priv->tx_throttled) { - spin_unlock_bh(&port->lock); + spin_unlock_irqrestore(&port->lock, flags); return 0; } clear_bit(0, &port->write_urbs_free); - spin_unlock_bh(&port->lock); + spin_unlock_irqrestore(&port->lock, flags); /* At this point the URB is in our control, nobody else can submit it again (the only sudden transition was the one from EINPROGRESS to @@ -514,7 +512,8 @@ static int keyspan_pda_write(struct tty_struct *tty, goto exit; } } - if (count > priv->tx_room) { + + if (count >= priv->tx_room) { /* we're about to completely fill the Tx buffer, so we'll be throttled afterwards. */ count = priv->tx_room; @@ -547,7 +546,7 @@ static int keyspan_pda_write(struct tty_struct *tty, rc = count; exit: - if (rc < 0) + if (rc <= 0) set_bit(0, &port->write_urbs_free); return rc; } @@ -556,27 +555,28 @@ static int keyspan_pda_write(struct tty_struct *tty, static void keyspan_pda_write_bulk_callback(struct urb *urb) { struct usb_serial_port *port = urb->context; - struct keyspan_pda_private *priv; set_bit(0, &port->write_urbs_free); - priv = usb_get_serial_port_data(port); /* queue up a wakeup at scheduler time */ - schedule_work(&priv->wakeup_work); + usb_serial_port_softint(port); } static int keyspan_pda_write_room(struct tty_struct *tty) { struct usb_serial_port *port = tty->driver_data; - struct keyspan_pda_private *priv; - priv = usb_get_serial_port_data(port); - /* used by n_tty.c for processing of tabs and such. Giving it our - conservative guess is probably good enough, but needs testing by - running a console through the device. */ - return priv->tx_room; -} + struct keyspan_pda_private *priv = usb_get_serial_port_data(port); + unsigned long flags; + int room = 0; + spin_lock_irqsave(&port->lock, flags); + if (test_bit(0, &port->write_urbs_free) && !priv->tx_throttled) + room = priv->tx_room; + spin_unlock_irqrestore(&port->lock, flags); + + return room; +} static int keyspan_pda_chars_in_buffer(struct tty_struct *tty) { @@ -656,8 +656,12 @@ static int keyspan_pda_open(struct tty_struct *tty, } static void keyspan_pda_close(struct usb_serial_port *port) { + struct keyspan_pda_private *priv = usb_get_serial_port_data(port); + usb_kill_urb(port->write_urb); usb_kill_urb(port->interrupt_in_urb); + + cancel_work_sync(&priv->unthrottle_work); } @@ -714,7 +718,6 @@ static int keyspan_pda_port_probe(struct usb_serial_port *port) if (!priv) return -ENOMEM; - INIT_WORK(&priv->wakeup_work, keyspan_pda_wakeup_write); INIT_WORK(&priv->unthrottle_work, keyspan_pda_request_unthrottle); priv->serial = port->serial; priv->port = port; diff --git a/drivers/usb/serial/mos7720.c b/drivers/usb/serial/mos7720.c index 5eed1078fac87b..5a5d2a95070ed7 100644 --- a/drivers/usb/serial/mos7720.c +++ b/drivers/usb/serial/mos7720.c @@ -639,6 +639,8 @@ static void parport_mos7715_restore_state(struct parport *pp, spin_unlock(&release_lock); return; } + mos_parport->shadowDCR = s->u.pc.ctr; + mos_parport->shadowECR = s->u.pc.ecr; write_parport_reg_nonblock(mos_parport, MOS7720_DCR, mos_parport->shadowDCR); write_parport_reg_nonblock(mos_parport, MOS7720_ECR, diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 56d6f6d83bd788..3fe959104311b4 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -563,6 +563,9 @@ static void option_instat_callback(struct urb *urb); /* Device flags */ +/* Highest interface number which can be used with NCTRL() and RSVD() */ +#define FLAG_IFNUM_MAX 7 + /* Interface does not support modem-control requests */ #define NCTRL(ifnum) ((BIT(ifnum) & 0xff) << 8) @@ -1114,6 +1117,8 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM12, 0xff, 0xff, 0xff), .driver_info = RSVD(1) | RSVD(2) | RSVD(3) | RSVD(4) | NUMEP2 }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM12, 0xff, 0, 0) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, 0x0620, 0xff, 0xff, 0x30) }, /* EM160R-GL */ + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, 0x0620, 0xff, 0, 0) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM500Q, 0xff, 0xff, 0x30) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM500Q, 0xff, 0, 0) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM500Q, 0xff, 0xff, 0x10), @@ -2054,6 +2059,7 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0105, 0xff), /* Fibocom NL678 series */ .driver_info = RSVD(6) }, { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a0, 0xff) }, /* Fibocom NL668-AM/NL652-EU (laptop MBIM) */ + { USB_DEVICE_INTERFACE_CLASS(0x2df3, 0x9d03, 0xff) }, /* LongSung M5710 */ { USB_DEVICE_INTERFACE_CLASS(0x305a, 0x1404, 0xff) }, /* GosunCn GM500 RNDIS */ { USB_DEVICE_INTERFACE_CLASS(0x305a, 0x1405, 0xff) }, /* GosunCn GM500 MBIM */ { USB_DEVICE_INTERFACE_CLASS(0x305a, 0x1406, 0xff) }, /* GosunCn GM500 ECM/NCM */ @@ -2101,6 +2107,14 @@ static struct usb_serial_driver * const serial_drivers[] = { module_usb_serial_driver(serial_drivers, option_ids); +static bool iface_is_reserved(unsigned long device_flags, u8 ifnum) +{ + if (ifnum > FLAG_IFNUM_MAX) + return false; + + return device_flags & RSVD(ifnum); +} + static int option_probe(struct usb_serial *serial, const struct usb_device_id *id) { @@ -2117,7 +2131,7 @@ static int option_probe(struct usb_serial *serial, * the same class/subclass/protocol as the serial interfaces. Look at * the Windows driver .INF files for reserved interface numbers. */ - if (device_flags & RSVD(iface_desc->bInterfaceNumber)) + if (iface_is_reserved(device_flags, iface_desc->bInterfaceNumber)) return -ENODEV; /* @@ -2133,6 +2147,14 @@ static int option_probe(struct usb_serial *serial, return 0; } +static bool iface_no_modem_control(unsigned long device_flags, u8 ifnum) +{ + if (ifnum > FLAG_IFNUM_MAX) + return false; + + return device_flags & NCTRL(ifnum); +} + static int option_attach(struct usb_serial *serial) { struct usb_interface_descriptor *iface_desc; @@ -2148,7 +2170,7 @@ static int option_attach(struct usb_serial *serial) iface_desc = &serial->interface->cur_altsetting->desc; - if (!(device_flags & NCTRL(iface_desc->bInterfaceNumber))) + if (!iface_no_modem_control(device_flags, iface_desc->bInterfaceNumber)) data->use_send_setup = 1; if (device_flags & ZLP) diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c index 652d6d6f1f3652..ff6f41e7e06835 100644 --- a/drivers/usb/storage/uas.c +++ b/drivers/usb/storage/uas.c @@ -867,6 +867,9 @@ static int uas_slave_configure(struct scsi_device *sdev) if (devinfo->flags & US_FL_NO_READ_CAPACITY_16) sdev->no_read_capacity_16 = 1; + /* Some disks cannot handle WRITE_SAME */ + if (devinfo->flags & US_FL_NO_SAME) + sdev->no_write_same = 1; /* * Some disks return the total number of blocks in response * to READ CAPACITY rather than the highest block number. diff --git a/drivers/usb/storage/unusual_uas.h b/drivers/usb/storage/unusual_uas.h index 711ab240058c7d..f9677a5ec31b28 100644 --- a/drivers/usb/storage/unusual_uas.h +++ b/drivers/usb/storage/unusual_uas.h @@ -35,12 +35,15 @@ UNUSUAL_DEV(0x054c, 0x087d, 0x0000, 0x9999, USB_SC_DEVICE, USB_PR_DEVICE, NULL, US_FL_NO_REPORT_OPCODES), -/* Reported-by: Julian Groß */ +/* + * Initially Reported-by: Julian Groß + * Further reports David C. Partridge + */ UNUSUAL_DEV(0x059f, 0x105f, 0x0000, 0x9999, "LaCie", "2Big Quadra USB3", USB_SC_DEVICE, USB_PR_DEVICE, NULL, - US_FL_NO_REPORT_OPCODES), + US_FL_NO_REPORT_OPCODES | US_FL_NO_SAME), /* * Apricorn USB3 dongle sometimes returns "USBSUSBSUSBS" in response to SCSI @@ -87,6 +90,13 @@ UNUSUAL_DEV(0x152d, 0x0578, 0x0000, 0x9999, USB_SC_DEVICE, USB_PR_DEVICE, NULL, US_FL_BROKEN_FUA), +/* Reported-by: Thinh Nguyen */ +UNUSUAL_DEV(0x154b, 0xf00b, 0x0000, 0x9999, + "PNY", + "Pro Elite SSD", + USB_SC_DEVICE, USB_PR_DEVICE, NULL, + US_FL_NO_ATA_1X), + /* Reported-by: Thinh Nguyen */ UNUSUAL_DEV(0x154b, 0xf00d, 0x0000, 0x9999, "PNY", diff --git a/drivers/usb/storage/usb.c b/drivers/usb/storage/usb.c index 94a64729dc27d6..90aa9c12ffac5f 100644 --- a/drivers/usb/storage/usb.c +++ b/drivers/usb/storage/usb.c @@ -541,6 +541,9 @@ void usb_stor_adjust_quirks(struct usb_device *udev, unsigned long *fflags) case 'j': f |= US_FL_NO_REPORT_LUNS; break; + case 'k': + f |= US_FL_NO_SAME; + break; case 'l': f |= US_FL_NOT_LOCKABLE; break; diff --git a/drivers/usb/typec/altmodes/Kconfig b/drivers/usb/typec/altmodes/Kconfig index 187690fd1a5bd3..60d375e9c3c7cb 100644 --- a/drivers/usb/typec/altmodes/Kconfig +++ b/drivers/usb/typec/altmodes/Kconfig @@ -20,6 +20,6 @@ config TYPEC_NVIDIA_ALTMODE to enable support for VirtualLink devices with NVIDIA GPUs. To compile this driver as a module, choose M here: the - module will be called typec_displayport. + module will be called typec_nvidia. endmenu diff --git a/drivers/usb/typec/mux/intel_pmc_mux.c b/drivers/usb/typec/mux/intel_pmc_mux.c index d7f63b74c6b14b..17896bd87fc3f4 100644 --- a/drivers/usb/typec/mux/intel_pmc_mux.c +++ b/drivers/usb/typec/mux/intel_pmc_mux.c @@ -202,10 +202,21 @@ static int pmc_usb_mux_dp_hpd(struct pmc_usb_port *port, struct typec_displayport_data *dp) { u8 msg[2] = { }; + int ret; msg[0] = PMC_USB_DP_HPD; msg[0] |= port->usb3_port << PMC_USB_MSG_USB3_PORT_SHIFT; + /* Configure HPD first if HPD,IRQ comes together */ + if (!IOM_PORT_HPD_ASSERTED(port->iom_status) && + dp->status & DP_STATUS_IRQ_HPD && + dp->status & DP_STATUS_HPD_STATE) { + msg[1] = PMC_USB_DP_HPD_LVL; + ret = pmc_usb_command(port, msg, sizeof(msg)); + if (ret) + return ret; + } + if (dp->status & DP_STATUS_IRQ_HPD) msg[1] = PMC_USB_DP_HPD_IRQ; diff --git a/drivers/usb/usbip/vhci_hcd.c b/drivers/usb/usbip/vhci_hcd.c index 66cde5e5f7964a..3209b5ddd30c97 100644 --- a/drivers/usb/usbip/vhci_hcd.c +++ b/drivers/usb/usbip/vhci_hcd.c @@ -396,6 +396,8 @@ static int vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, default: usbip_dbg_vhci_rh(" ClearPortFeature: default %x\n", wValue); + if (wValue >= 32) + goto error; vhci_hcd->port_status[rhport] &= ~(1 << wValue); break; } diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index 1fa6fcac82992f..81b932f72e1037 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -464,6 +464,11 @@ static int mlx5_vdpa_poll_one(struct mlx5_vdpa_cq *vcq) static void mlx5_vdpa_handle_completions(struct mlx5_vdpa_virtqueue *mvq, int num) { mlx5_cq_set_ci(&mvq->cq.mcq); + + /* make sure CQ cosumer update is visible to the hardware before updating + * RX doorbell record. + */ + dma_wmb(); rx_post(&mvq->vqqp, num); if (mvq->event_cb.callback) mvq->event_cb.callback(mvq->event_cb.private); diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index e6190173482c7e..706de3ef94bbff 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -161,8 +161,6 @@ static void vfio_pci_probe_mmaps(struct vfio_pci_device *vdev) int i; struct vfio_pci_dummy_resource *dummy_res; - INIT_LIST_HEAD(&vdev->dummy_resources_list); - for (i = 0; i < PCI_STD_NUM_BARS; i++) { int bar = i + PCI_STD_RESOURCES; @@ -1635,8 +1633,8 @@ static vm_fault_t vfio_pci_mmap_fault(struct vm_fault *vmf) mutex_unlock(&vdev->vma_lock); - if (remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, - vma->vm_end - vma->vm_start, vma->vm_page_prot)) + if (io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, + vma->vm_end - vma->vm_start, vma->vm_page_prot)) ret = VM_FAULT_SIGBUS; up_out: @@ -1966,6 +1964,7 @@ static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) mutex_init(&vdev->igate); spin_lock_init(&vdev->irqlock); mutex_init(&vdev->ioeventfds_lock); + INIT_LIST_HEAD(&vdev->dummy_resources_list); INIT_LIST_HEAD(&vdev->ioeventfds_list); mutex_init(&vdev->vma_lock); INIT_LIST_HEAD(&vdev->vma_list); diff --git a/drivers/vfio/pci/vfio_pci_nvlink2.c b/drivers/vfio/pci/vfio_pci_nvlink2.c index 65c61710c0e9a3..9adcf6a8f88857 100644 --- a/drivers/vfio/pci/vfio_pci_nvlink2.c +++ b/drivers/vfio/pci/vfio_pci_nvlink2.c @@ -231,7 +231,7 @@ int vfio_pci_nvdia_v100_nvlink2_init(struct vfio_pci_device *vdev) return -EINVAL; if (of_property_read_u32(npu_node, "memory-region", &mem_phandle)) - return -EINVAL; + return -ENODEV; mem_node = of_find_node_by_phandle(mem_phandle); if (!mem_node) @@ -393,7 +393,7 @@ int vfio_pci_ibm_npu2_init(struct vfio_pci_device *vdev) int ret; struct vfio_pci_npu2_data *data; struct device_node *nvlink_dn; - u32 nvlink_index = 0; + u32 nvlink_index = 0, mem_phandle = 0; struct pci_dev *npdev = vdev->pdev; struct device_node *npu_node = pci_device_to_OF_node(npdev); struct pci_controller *hose = pci_bus_to_host(npdev->bus); @@ -408,6 +408,9 @@ int vfio_pci_ibm_npu2_init(struct vfio_pci_device *vdev) if (!pnv_pci_get_gpu_dev(vdev->pdev)) return -ENODEV; + if (of_property_read_u32(npu_node, "memory-region", &mem_phandle)) + return -ENODEV; + /* * NPU2 normally has 8 ATSD registers (for concurrency) and 6 links * so we can allocate one register per link, using nvlink index as diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 531a00d703cdf8..c8784dfafdd733 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -863,6 +863,7 @@ static void handle_tx_zerocopy(struct vhost_net *net, struct socket *sock) size_t len, total_len = 0; int err; struct vhost_net_ubuf_ref *ubufs; + struct ubuf_info *ubuf; bool zcopy_used; int sent_pkts = 0; @@ -895,9 +896,7 @@ static void handle_tx_zerocopy(struct vhost_net *net, struct socket *sock) /* use msg_control to pass vhost zerocopy ubuf info to skb */ if (zcopy_used) { - struct ubuf_info *ubuf; ubuf = nvq->ubuf_info + nvq->upend_idx; - vq->heads[nvq->upend_idx].id = cpu_to_vhost32(vq, head); vq->heads[nvq->upend_idx].len = VHOST_DMA_IN_PROGRESS; ubuf->callback = vhost_zerocopy_callback; @@ -927,7 +926,8 @@ static void handle_tx_zerocopy(struct vhost_net *net, struct socket *sock) err = sock->ops->sendmsg(sock, &msg, len); if (unlikely(err < 0)) { if (zcopy_used) { - vhost_net_ubuf_put(ubufs); + if (vq->heads[ubuf->desc].len == VHOST_DMA_IN_PROGRESS) + vhost_net_ubuf_put(ubufs); nvq->upend_idx = ((unsigned)nvq->upend_idx - 1) % UIO_MAXIOV; } diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index 6ff8a509669154..4ce9f00ae10e84 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -1643,7 +1643,8 @@ vhost_scsi_set_endpoint(struct vhost_scsi *vs, if (!vhost_vq_is_setup(vq)) continue; - if (vhost_scsi_setup_vq_cmds(vq, vq->num)) + ret = vhost_scsi_setup_vq_cmds(vq, vq->num); + if (ret) goto destroy_vq_cmds; } diff --git a/drivers/video/fbdev/atmel_lcdfb.c b/drivers/video/fbdev/atmel_lcdfb.c index 8c1d47e52b1a6b..355b6120dc4f0d 100644 --- a/drivers/video/fbdev/atmel_lcdfb.c +++ b/drivers/video/fbdev/atmel_lcdfb.c @@ -987,8 +987,8 @@ static int atmel_lcdfb_of_init(struct atmel_lcdfb_info *sinfo) } INIT_LIST_HEAD(&pdata->pwr_gpios); - ret = -ENOMEM; for (i = 0; i < gpiod_count(dev, "atmel,power-control"); i++) { + ret = -ENOMEM; gpiod = devm_gpiod_get_index(dev, "atmel,power-control", i, GPIOD_ASIS); if (IS_ERR(gpiod)) diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c index cef437817b0dc6..8d1ae973041aeb 100644 --- a/drivers/video/fbdev/core/fbcon.c +++ b/drivers/video/fbdev/core/fbcon.c @@ -1033,7 +1033,7 @@ static void fbcon_init(struct vc_data *vc, int init) struct vc_data *svc = *default_mode; struct fbcon_display *t, *p = &fb_display[vc->vc_num]; int logo = 1, new_rows, new_cols, rows, cols, charcnt = 256; - int cap, ret; + int ret; if (WARN_ON(info_idx == -1)) return; @@ -1042,7 +1042,6 @@ static void fbcon_init(struct vc_data *vc, int init) con2fb_map[vc->vc_num] = info_idx; info = registered_fb[con2fb_map[vc->vc_num]]; - cap = info->flags; if (logo_shown < 0 && console_loglevel <= CONSOLE_LOGLEVEL_QUIET) logo_shown = FBCON_LOGO_DONTSHOW; @@ -1147,11 +1146,13 @@ static void fbcon_init(struct vc_data *vc, int init) ops->graphics = 0; - if ((cap & FBINFO_HWACCEL_COPYAREA) && - !(cap & FBINFO_HWACCEL_DISABLED)) - p->scrollmode = SCROLL_MOVE; - else /* default to something safe */ - p->scrollmode = SCROLL_REDRAW; + /* + * No more hw acceleration for fbcon. + * + * FIXME: Garbage collect all the now dead code after sufficient time + * has passed. + */ + p->scrollmode = SCROLL_REDRAW; /* * ++guenther: console.c:vc_allocate() relies on initializing @@ -1961,45 +1962,15 @@ static void updatescrollmode(struct fbcon_display *p, { struct fbcon_ops *ops = info->fbcon_par; int fh = vc->vc_font.height; - int cap = info->flags; - u16 t = 0; - int ypan = FBCON_SWAP(ops->rotate, info->fix.ypanstep, - info->fix.xpanstep); - int ywrap = FBCON_SWAP(ops->rotate, info->fix.ywrapstep, t); int yres = FBCON_SWAP(ops->rotate, info->var.yres, info->var.xres); int vyres = FBCON_SWAP(ops->rotate, info->var.yres_virtual, info->var.xres_virtual); - int good_pan = (cap & FBINFO_HWACCEL_YPAN) && - divides(ypan, vc->vc_font.height) && vyres > yres; - int good_wrap = (cap & FBINFO_HWACCEL_YWRAP) && - divides(ywrap, vc->vc_font.height) && - divides(vc->vc_font.height, vyres) && - divides(vc->vc_font.height, yres); - int reading_fast = cap & FBINFO_READS_FAST; - int fast_copyarea = (cap & FBINFO_HWACCEL_COPYAREA) && - !(cap & FBINFO_HWACCEL_DISABLED); - int fast_imageblit = (cap & FBINFO_HWACCEL_IMAGEBLIT) && - !(cap & FBINFO_HWACCEL_DISABLED); p->vrows = vyres/fh; if (yres > (fh * (vc->vc_rows + 1))) p->vrows -= (yres - (fh * vc->vc_rows)) / fh; if ((yres % fh) && (vyres % fh < yres % fh)) p->vrows--; - - if (good_wrap || good_pan) { - if (reading_fast || fast_copyarea) - p->scrollmode = good_wrap ? - SCROLL_WRAP_MOVE : SCROLL_PAN_MOVE; - else - p->scrollmode = good_wrap ? SCROLL_REDRAW : - SCROLL_PAN_REDRAW; - } else { - if (reading_fast || (fast_copyarea && !fast_imageblit)) - p->scrollmode = SCROLL_MOVE; - else - p->scrollmode = SCROLL_REDRAW; - } } #define PITCH(w) (((w) + 7) >> 3) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index becc776979602b..71e16b53e9c185 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -1608,7 +1608,6 @@ static struct virtqueue *vring_create_virtqueue_packed( vq->num_added = 0; vq->packed_ring = true; vq->use_dma_api = vring_use_dma_api(vdev); - list_add_tail(&vq->vq.list, &vdev->vqs); #ifdef DEBUG vq->in_use = false; vq->last_add_time_valid = false; @@ -1669,6 +1668,7 @@ static struct virtqueue *vring_create_virtqueue_packed( cpu_to_le16(vq->packed.event_flags_shadow); } + list_add_tail(&vq->vq.list, &vdev->vqs); return &vq->vq; err_desc_extra: @@ -1676,9 +1676,9 @@ static struct virtqueue *vring_create_virtqueue_packed( err_desc_state: kfree(vq); err_vq: - vring_free_queue(vdev, event_size_in_bytes, device, ring_dma_addr); + vring_free_queue(vdev, event_size_in_bytes, device, device_event_dma_addr); err_device: - vring_free_queue(vdev, event_size_in_bytes, driver, ring_dma_addr); + vring_free_queue(vdev, event_size_in_bytes, driver, driver_event_dma_addr); err_driver: vring_free_queue(vdev, ring_size_in_bytes, ring, ring_dma_addr); err_ring: @@ -2085,7 +2085,6 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index, vq->last_used_idx = 0; vq->num_added = 0; vq->use_dma_api = vring_use_dma_api(vdev); - list_add_tail(&vq->vq.list, &vdev->vqs); #ifdef DEBUG vq->in_use = false; vq->last_add_time_valid = false; @@ -2127,6 +2126,7 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index, memset(vq->split.desc_state, 0, vring.num * sizeof(struct vring_desc_state_split)); + list_add_tail(&vq->vq.list, &vdev->vqs); return &vq->vq; } EXPORT_SYMBOL_GPL(__vring_new_virtqueue); diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig index fd7968635e6dfc..db935d6b10c277 100644 --- a/drivers/watchdog/Kconfig +++ b/drivers/watchdog/Kconfig @@ -386,6 +386,7 @@ config ARM_SBSA_WATCHDOG config ARMADA_37XX_WATCHDOG tristate "Armada 37xx watchdog" depends on ARCH_MVEBU || COMPILE_TEST + depends on HAS_IOMEM select MFD_SYSCON select WATCHDOG_CORE help @@ -631,7 +632,7 @@ config SUNXI_WATCHDOG config COH901327_WATCHDOG bool "ST-Ericsson COH 901 327 watchdog" - depends on ARCH_U300 || (ARM && COMPILE_TEST) + depends on ARCH_U300 || (ARM && COMMON_CLK && COMPILE_TEST) default y if MACH_U300 select WATCHDOG_CORE help @@ -789,6 +790,7 @@ config MOXART_WDT config SIRFSOC_WATCHDOG tristate "SiRFSOC watchdog" + depends on HAS_IOMEM depends on ARCH_SIRF || COMPILE_TEST select WATCHDOG_CORE default y diff --git a/drivers/watchdog/qcom-wdt.c b/drivers/watchdog/qcom-wdt.c index ab7465d186fdaa..cdf754233e53da 100644 --- a/drivers/watchdog/qcom-wdt.c +++ b/drivers/watchdog/qcom-wdt.c @@ -148,7 +148,7 @@ static int qcom_wdt_restart(struct watchdog_device *wdd, unsigned long action, */ wmb(); - msleep(150); + mdelay(150); return 0; } diff --git a/drivers/watchdog/rti_wdt.c b/drivers/watchdog/rti_wdt.c index 836319cbaca9d7..359302f71f7efe 100644 --- a/drivers/watchdog/rti_wdt.c +++ b/drivers/watchdog/rti_wdt.c @@ -227,8 +227,10 @@ static int rti_wdt_probe(struct platform_device *pdev) pm_runtime_enable(dev); ret = pm_runtime_get_sync(dev); - if (ret) + if (ret) { + pm_runtime_put_noidle(dev); return dev_err_probe(dev, ret, "runtime pm failed\n"); + } platform_set_drvdata(pdev, wdt); diff --git a/drivers/watchdog/sprd_wdt.c b/drivers/watchdog/sprd_wdt.c index 65cb55f3916fce..b9b1daa9e2a4c7 100644 --- a/drivers/watchdog/sprd_wdt.c +++ b/drivers/watchdog/sprd_wdt.c @@ -108,18 +108,6 @@ static int sprd_wdt_load_value(struct sprd_wdt *wdt, u32 timeout, u32 tmr_step = timeout * SPRD_WDT_CNT_STEP; u32 prtmr_step = pretimeout * SPRD_WDT_CNT_STEP; - sprd_wdt_unlock(wdt->base); - writel_relaxed((tmr_step >> SPRD_WDT_CNT_HIGH_SHIFT) & - SPRD_WDT_LOW_VALUE_MASK, wdt->base + SPRD_WDT_LOAD_HIGH); - writel_relaxed((tmr_step & SPRD_WDT_LOW_VALUE_MASK), - wdt->base + SPRD_WDT_LOAD_LOW); - writel_relaxed((prtmr_step >> SPRD_WDT_CNT_HIGH_SHIFT) & - SPRD_WDT_LOW_VALUE_MASK, - wdt->base + SPRD_WDT_IRQ_LOAD_HIGH); - writel_relaxed(prtmr_step & SPRD_WDT_LOW_VALUE_MASK, - wdt->base + SPRD_WDT_IRQ_LOAD_LOW); - sprd_wdt_lock(wdt->base); - /* * Waiting the load value operation done, * it needs two or three RTC clock cycles. @@ -134,6 +122,19 @@ static int sprd_wdt_load_value(struct sprd_wdt *wdt, u32 timeout, if (delay_cnt >= SPRD_WDT_LOAD_TIMEOUT) return -EBUSY; + + sprd_wdt_unlock(wdt->base); + writel_relaxed((tmr_step >> SPRD_WDT_CNT_HIGH_SHIFT) & + SPRD_WDT_LOW_VALUE_MASK, wdt->base + SPRD_WDT_LOAD_HIGH); + writel_relaxed((tmr_step & SPRD_WDT_LOW_VALUE_MASK), + wdt->base + SPRD_WDT_LOAD_LOW); + writel_relaxed((prtmr_step >> SPRD_WDT_CNT_HIGH_SHIFT) & + SPRD_WDT_LOW_VALUE_MASK, + wdt->base + SPRD_WDT_IRQ_LOAD_HIGH); + writel_relaxed(prtmr_step & SPRD_WDT_LOW_VALUE_MASK, + wdt->base + SPRD_WDT_IRQ_LOAD_LOW); + sprd_wdt_lock(wdt->base); + return 0; } @@ -345,15 +346,10 @@ static int __maybe_unused sprd_wdt_pm_resume(struct device *dev) if (ret) return ret; - if (watchdog_active(&wdt->wdd)) { + if (watchdog_active(&wdt->wdd)) ret = sprd_wdt_start(&wdt->wdd); - if (ret) { - sprd_wdt_disable(wdt); - return ret; - } - } - return 0; + return ret; } static const struct dev_pm_ops sprd_wdt_pm_ops = { diff --git a/drivers/watchdog/watchdog_core.c b/drivers/watchdog/watchdog_core.c index 4238447578128a..0e9a99559609c4 100644 --- a/drivers/watchdog/watchdog_core.c +++ b/drivers/watchdog/watchdog_core.c @@ -267,15 +267,19 @@ static int __watchdog_register_device(struct watchdog_device *wdd) } if (test_bit(WDOG_STOP_ON_REBOOT, &wdd->status)) { - wdd->reboot_nb.notifier_call = watchdog_reboot_notifier; - - ret = register_reboot_notifier(&wdd->reboot_nb); - if (ret) { - pr_err("watchdog%d: Cannot register reboot notifier (%d)\n", - wdd->id, ret); - watchdog_dev_unregister(wdd); - ida_simple_remove(&watchdog_ida, id); - return ret; + if (!wdd->ops->stop) + pr_warn("watchdog%d: stop_on_reboot not supported\n", wdd->id); + else { + wdd->reboot_nb.notifier_call = watchdog_reboot_notifier; + + ret = register_reboot_notifier(&wdd->reboot_nb); + if (ret) { + pr_err("watchdog%d: Cannot register reboot notifier (%d)\n", + wdd->id, ret); + watchdog_dev_unregister(wdd); + ida_simple_remove(&watchdog_ida, id); + return ret; + } } } diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index 6038c4c35db5ad..bbebe248b72647 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -2010,16 +2010,6 @@ static struct irq_chip xen_percpu_chip __read_mostly = { .irq_ack = ack_dynirq, }; -int xen_set_callback_via(uint64_t via) -{ - struct xen_hvm_param a; - a.domid = DOMID_SELF; - a.index = HVM_PARAM_CALLBACK_IRQ; - a.value = via; - return HYPERVISOR_hvm_op(HVMOP_set_param, &a); -} -EXPORT_SYMBOL_GPL(xen_set_callback_via); - #ifdef CONFIG_XEN_PVHVM /* Vector callbacks are better than PCI interrupts to receive event * channel notifications because we can receive vector callbacks on any diff --git a/drivers/xen/platform-pci.c b/drivers/xen/platform-pci.c index dd911e1ff782cc..9db557b76511b2 100644 --- a/drivers/xen/platform-pci.c +++ b/drivers/xen/platform-pci.c @@ -149,7 +149,6 @@ static int platform_pci_probe(struct pci_dev *pdev, ret = gnttab_init(); if (ret) goto grant_out; - xenbus_probe(NULL); return 0; grant_out: gnttab_free_auto_xlat_frames(); diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c index b0c73c58f9874a..720a7b7abd46d6 100644 --- a/drivers/xen/privcmd.c +++ b/drivers/xen/privcmd.c @@ -717,14 +717,15 @@ static long privcmd_ioctl_restrict(struct file *file, void __user *udata) return 0; } -static long privcmd_ioctl_mmap_resource(struct file *file, void __user *udata) +static long privcmd_ioctl_mmap_resource(struct file *file, + struct privcmd_mmap_resource __user *udata) { struct privcmd_data *data = file->private_data; struct mm_struct *mm = current->mm; struct vm_area_struct *vma; struct privcmd_mmap_resource kdata; xen_pfn_t *pfns = NULL; - struct xen_mem_acquire_resource xdata; + struct xen_mem_acquire_resource xdata = { }; int rc; if (copy_from_user(&kdata, udata, sizeof(kdata))) @@ -734,6 +735,22 @@ static long privcmd_ioctl_mmap_resource(struct file *file, void __user *udata) if (data->domid != DOMID_INVALID && data->domid != kdata.dom) return -EPERM; + /* Both fields must be set or unset */ + if (!!kdata.addr != !!kdata.num) + return -EINVAL; + + xdata.domid = kdata.dom; + xdata.type = kdata.type; + xdata.id = kdata.id; + + if (!kdata.addr && !kdata.num) { + /* Query the size of the resource. */ + rc = HYPERVISOR_memory_op(XENMEM_acquire_resource, &xdata); + if (rc) + return rc; + return __put_user(xdata.nr_frames, &udata->num); + } + mmap_write_lock(mm); vma = find_vma(mm, kdata.addr); @@ -768,10 +785,6 @@ static long privcmd_ioctl_mmap_resource(struct file *file, void __user *udata) } else vma->vm_private_data = PRIV_VMA_LOCKED; - memset(&xdata, 0, sizeof(xdata)); - xdata.domid = kdata.dom; - xdata.type = kdata.type; - xdata.id = kdata.id; xdata.frame = kdata.idx; xdata.nr_frames = kdata.num; set_xen_guest_handle(xdata.frame_list, pfns); diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c index 4b99ec3dec58ab..e7c692cfb2cf84 100644 --- a/drivers/xen/xen-pciback/xenbus.c +++ b/drivers/xen/xen-pciback/xenbus.c @@ -689,7 +689,7 @@ static int xen_pcibk_xenbus_probe(struct xenbus_device *dev, /* watch the backend node for backend configuration information */ err = xenbus_watch_path(dev, dev->nodename, &pdev->be_watch, - xen_pcibk_be_watch); + NULL, xen_pcibk_be_watch); if (err) goto out; diff --git a/drivers/xen/xenbus/xenbus.h b/drivers/xen/xenbus/xenbus.h index 5f5b8a7d5b80b9..dc153733541441 100644 --- a/drivers/xen/xenbus/xenbus.h +++ b/drivers/xen/xenbus/xenbus.h @@ -44,6 +44,8 @@ struct xen_bus_type { int (*get_bus_id)(char bus_id[XEN_BUS_ID_SIZE], const char *nodename); int (*probe)(struct xen_bus_type *bus, const char *type, const char *dir); + bool (*otherend_will_handle)(struct xenbus_watch *watch, + const char *path, const char *token); void (*otherend_changed)(struct xenbus_watch *watch, const char *path, const char *token); struct bus_type bus; @@ -113,6 +115,7 @@ int xenbus_probe_node(struct xen_bus_type *bus, const char *type, const char *nodename); int xenbus_probe_devices(struct xen_bus_type *bus); +void xenbus_probe(void); void xenbus_dev_changed(const char *node, struct xen_bus_type *bus); diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c index fd80e318b99cc7..0cd728961fce9e 100644 --- a/drivers/xen/xenbus/xenbus_client.c +++ b/drivers/xen/xenbus/xenbus_client.c @@ -127,18 +127,22 @@ EXPORT_SYMBOL_GPL(xenbus_strstate); */ int xenbus_watch_path(struct xenbus_device *dev, const char *path, struct xenbus_watch *watch, + bool (*will_handle)(struct xenbus_watch *, + const char *, const char *), void (*callback)(struct xenbus_watch *, const char *, const char *)) { int err; watch->node = path; + watch->will_handle = will_handle; watch->callback = callback; err = register_xenbus_watch(watch); if (err) { watch->node = NULL; + watch->will_handle = NULL; watch->callback = NULL; xenbus_dev_fatal(dev, err, "adding watch on %s", path); } @@ -165,6 +169,8 @@ EXPORT_SYMBOL_GPL(xenbus_watch_path); */ int xenbus_watch_pathfmt(struct xenbus_device *dev, struct xenbus_watch *watch, + bool (*will_handle)(struct xenbus_watch *, + const char *, const char *), void (*callback)(struct xenbus_watch *, const char *, const char *), const char *pathfmt, ...) @@ -181,7 +187,7 @@ int xenbus_watch_pathfmt(struct xenbus_device *dev, xenbus_dev_fatal(dev, -ENOMEM, "allocating path for watch"); return -ENOMEM; } - err = xenbus_watch_path(dev, path, watch, callback); + err = xenbus_watch_path(dev, path, watch, will_handle, callback); if (err) kfree(path); diff --git a/drivers/xen/xenbus/xenbus_comms.c b/drivers/xen/xenbus/xenbus_comms.c index eb5151fc8efab0..e5fda0256feb3d 100644 --- a/drivers/xen/xenbus/xenbus_comms.c +++ b/drivers/xen/xenbus/xenbus_comms.c @@ -57,16 +57,8 @@ DEFINE_MUTEX(xs_response_mutex); static int xenbus_irq; static struct task_struct *xenbus_task; -static DECLARE_WORK(probe_work, xenbus_probe); - - static irqreturn_t wake_waiting(int irq, void *unused) { - if (unlikely(xenstored_ready == 0)) { - xenstored_ready = 1; - schedule_work(&probe_work); - } - wake_up(&xb_waitq); return IRQ_HANDLED; } diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c index 38725d97d90933..18ffd0551b5429 100644 --- a/drivers/xen/xenbus/xenbus_probe.c +++ b/drivers/xen/xenbus/xenbus_probe.c @@ -136,6 +136,7 @@ static int watch_otherend(struct xenbus_device *dev) container_of(dev->dev.bus, struct xen_bus_type, bus); return xenbus_watch_pathfmt(dev, &dev->otherend_watch, + bus->otherend_will_handle, bus->otherend_changed, "%s/%s", dev->otherend, "state"); } @@ -682,29 +683,107 @@ void unregister_xenstore_notifier(struct notifier_block *nb) } EXPORT_SYMBOL_GPL(unregister_xenstore_notifier); -void xenbus_probe(struct work_struct *unused) +void xenbus_probe(void) { xenstored_ready = 1; + /* + * In the HVM case, xenbus_init() deferred its call to + * xs_init() in case callbacks were not operational yet. + * So do it now. + */ + if (xen_store_domain_type == XS_HVM) + xs_init(); + /* Notify others that xenstore is up */ blocking_notifier_call_chain(&xenstore_chain, 0, NULL); } -EXPORT_SYMBOL_GPL(xenbus_probe); -static int __init xenbus_probe_initcall(void) +/* + * Returns true when XenStore init must be deferred in order to + * allow the PCI platform device to be initialised, before we + * can actually have event channel interrupts working. + */ +static bool xs_hvm_defer_init_for_callback(void) { - if (!xen_domain()) - return -ENODEV; +#ifdef CONFIG_XEN_PVHVM + return xen_store_domain_type == XS_HVM && + !xen_have_vector_callback; +#else + return false; +#endif +} - if (xen_initial_domain() || xen_hvm_domain()) - return 0; +static int xenbus_probe_thread(void *unused) +{ + DEFINE_WAIT(w); - xenbus_probe(NULL); + /* + * We actually just want to wait for *any* trigger of xb_waitq, + * and run xenbus_probe() the moment it occurs. + */ + prepare_to_wait(&xb_waitq, &w, TASK_INTERRUPTIBLE); + schedule(); + finish_wait(&xb_waitq, &w); + + DPRINTK("probing"); + xenbus_probe(); return 0; } +static int __init xenbus_probe_initcall(void) +{ + /* + * Probe XenBus here in the XS_PV case, and also XS_HVM unless we + * need to wait for the platform PCI device to come up. + */ + if (xen_store_domain_type == XS_PV || + (xen_store_domain_type == XS_HVM && + !xs_hvm_defer_init_for_callback())) + xenbus_probe(); + + /* + * For XS_LOCAL, spawn a thread which will wait for xenstored + * or a xenstore-stubdom to be started, then probe. It will be + * triggered when communication starts happening, by waiting + * on xb_waitq. + */ + if (xen_store_domain_type == XS_LOCAL) { + struct task_struct *probe_task; + + probe_task = kthread_run(xenbus_probe_thread, NULL, + "xenbus_probe"); + if (IS_ERR(probe_task)) + return PTR_ERR(probe_task); + } + return 0; +} device_initcall(xenbus_probe_initcall); +int xen_set_callback_via(uint64_t via) +{ + struct xen_hvm_param a; + int ret; + + a.domid = DOMID_SELF; + a.index = HVM_PARAM_CALLBACK_IRQ; + a.value = via; + + ret = HYPERVISOR_hvm_op(HVMOP_set_param, &a); + if (ret) + return ret; + + /* + * If xenbus_probe_initcall() deferred the xenbus_probe() + * due to the callback not functioning yet, we can do it now. + */ + if (!xenstored_ready && xs_hvm_defer_init_for_callback()) + xenbus_probe(); + + return ret; +} +EXPORT_SYMBOL_GPL(xen_set_callback_via); + /* Set up event channel for xenstored which is run as a local process * (this is normally used only in dom0) */ @@ -817,11 +896,17 @@ static int __init xenbus_init(void) break; } - /* Initialize the interface to xenstore. */ - err = xs_init(); - if (err) { - pr_warn("Error initializing xenstore comms: %i\n", err); - goto out_error; + /* + * HVM domains may not have a functional callback yet. In that + * case let xs_init() be called from xenbus_probe(), which will + * get invoked at an appropriate time. + */ + if (xen_store_domain_type != XS_HVM) { + err = xs_init(); + if (err) { + pr_warn("Error initializing xenstore comms: %i\n", err); + goto out_error; + } } if ((xen_store_domain_type != XS_LOCAL) && diff --git a/drivers/xen/xenbus/xenbus_probe_backend.c b/drivers/xen/xenbus/xenbus_probe_backend.c index 2ba699897e6dd9..5abded97e1a7e5 100644 --- a/drivers/xen/xenbus/xenbus_probe_backend.c +++ b/drivers/xen/xenbus/xenbus_probe_backend.c @@ -180,6 +180,12 @@ static int xenbus_probe_backend(struct xen_bus_type *bus, const char *type, return err; } +static bool frontend_will_handle(struct xenbus_watch *watch, + const char *path, const char *token) +{ + return watch->nr_pending == 0; +} + static void frontend_changed(struct xenbus_watch *watch, const char *path, const char *token) { @@ -191,6 +197,7 @@ static struct xen_bus_type xenbus_backend = { .levels = 3, /* backend/type// */ .get_bus_id = backend_bus_id, .probe = xenbus_probe_backend, + .otherend_will_handle = frontend_will_handle, .otherend_changed = frontend_changed, .bus = { .name = "xen-backend", diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c index 3a06eb699f3330..12e02eb01f5991 100644 --- a/drivers/xen/xenbus/xenbus_xs.c +++ b/drivers/xen/xenbus/xenbus_xs.c @@ -705,9 +705,13 @@ int xs_watch_msg(struct xs_watch_event *event) spin_lock(&watches_lock); event->handle = find_watch(event->token); - if (event->handle != NULL) { + if (event->handle != NULL && + (!event->handle->will_handle || + event->handle->will_handle(event->handle, + event->path, event->token))) { spin_lock(&watch_events_lock); list_add_tail(&event->list, &watch_events); + event->handle->nr_pending++; wake_up(&watch_events_waitq); spin_unlock(&watch_events_lock); } else @@ -765,6 +769,8 @@ int register_xenbus_watch(struct xenbus_watch *watch) sprintf(token, "%lX", (long)watch); + watch->nr_pending = 0; + down_read(&xs_watch_rwsem); spin_lock(&watches_lock); @@ -814,11 +820,14 @@ void unregister_xenbus_watch(struct xenbus_watch *watch) /* Cancel pending watch events. */ spin_lock(&watch_events_lock); - list_for_each_entry_safe(event, tmp, &watch_events, list) { - if (event->handle != watch) - continue; - list_del(&event->list); - kfree(event); + if (watch->nr_pending) { + list_for_each_entry_safe(event, tmp, &watch_events, list) { + if (event->handle != watch) + continue; + list_del(&event->list); + kfree(event); + } + watch->nr_pending = 0; } spin_unlock(&watch_events_lock); @@ -865,7 +874,6 @@ void xs_suspend_cancel(void) static int xenwatch_thread(void *unused) { - struct list_head *ent; struct xs_watch_event *event; xenwatch_pid = current->pid; @@ -880,13 +888,15 @@ static int xenwatch_thread(void *unused) mutex_lock(&xenwatch_mutex); spin_lock(&watch_events_lock); - ent = watch_events.next; - if (ent != &watch_events) - list_del(ent); + event = list_first_entry_or_null(&watch_events, + struct xs_watch_event, list); + if (event) { + list_del(&event->list); + event->handle->nr_pending--; + } spin_unlock(&watch_events_lock); - if (ent != &watch_events) { - event = list_entry(ent, struct xs_watch_event, list); + if (event) { event->handle->callback(event->handle, event->path, event->token); kfree(event); diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c index 3ac7611ef7ce22..fd691e4815c564 100644 --- a/fs/bfs/inode.c +++ b/fs/bfs/inode.c @@ -350,7 +350,7 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent) info->si_lasti = (le32_to_cpu(bfs_sb->s_start) - BFS_BSIZE) / sizeof(struct bfs_inode) + BFS_ROOT_INO - 1; if (info->si_lasti == BFS_MAX_LASTI) - printf("WARNING: filesystem %s was created with 512 inodes, the real maximum is 511, mounting anyway\n", s->s_id); + printf("NOTE: filesystem %s was created with 512 inodes, the real maximum is 511, mounting anyway\n", s->s_id); else if (info->si_lasti > BFS_MAX_LASTI) { printf("Impossible last inode number %lu > %d on %s\n", info->si_lasti, BFS_MAX_LASTI, s->s_id); goto out1; diff --git a/fs/block_dev.c b/fs/block_dev.c index 9e84b1928b9401..2ea189c1b4ffe7 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -134,7 +134,15 @@ EXPORT_SYMBOL(truncate_bdev_range); static void set_init_blocksize(struct block_device *bdev) { - bdev->bd_inode->i_blkbits = blksize_bits(bdev_logical_block_size(bdev)); + unsigned int bsize = bdev_logical_block_size(bdev); + loff_t size = i_size_read(bdev->bd_inode); + + while (bsize < PAGE_SIZE) { + if (size & bsize) + break; + bsize <<= 1; + } + bdev->bd_inode->i_blkbits = blksize_bits(bsize); } int set_blocksize(struct block_device *bdev, int size) diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 771a036867dc05..553b4f6ec86391 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -3124,7 +3124,7 @@ void btrfs_backref_error_cleanup(struct btrfs_backref_cache *cache, list_del_init(&lower->list); if (lower == node) node = NULL; - btrfs_backref_free_node(cache, lower); + btrfs_backref_drop_node(cache, lower); } btrfs_backref_cleanup_node(cache, node); diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 3ba6f3839d3929..a2111eab614f2a 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -639,7 +639,15 @@ static noinline void caching_thread(struct btrfs_work *work) mutex_lock(&caching_ctl->mutex); down_read(&fs_info->commit_root_sem); - if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) + /* + * If we are in the transaction that populated the free space tree we + * can't actually cache from the free space tree as our commit root and + * real root are the same, so we could change the contents of the blocks + * while caching. Instead do the slow caching in this case, and after + * the transaction has committed we will be safe. + */ + if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE) && + !(test_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags))) ret = load_free_space_tree(caching_ctl); else ret = load_extent_tree_free(caching_ctl); @@ -2687,7 +2695,8 @@ int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans) * Go through delayed refs for all the stuff we've just kicked off * and then loop back (just once) */ - ret = btrfs_run_delayed_refs(trans, 0); + if (!ret) + ret = btrfs_run_delayed_refs(trans, 0); if (!ret && loops == 0) { loops++; spin_lock(&cur_trans->dirty_bgs_lock); diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 92dd86bceae310..8de4bf8edb9c09 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -35,6 +35,22 @@ enum { BTRFS_INODE_IN_DELALLOC_LIST, BTRFS_INODE_HAS_PROPS, BTRFS_INODE_SNAPSHOT_FLUSH, + /* + * Set and used when logging an inode and it serves to signal that an + * inode does not have xattrs, so subsequent fsyncs can avoid searching + * for xattrs to log. This bit must be cleared whenever a xattr is added + * to an inode. + */ + BTRFS_INODE_NO_XATTRS, + /* + * Set when we are in a context where we need to start a transaction and + * have dirty pages with the respective file range locked. This is to + * ensure that when reserving space for the transaction, if we are low + * on available space and need to flush delalloc, we will not flush + * delalloc for this inode, because that could result in a deadlock (on + * the file range, inode's io_tree). + */ + BTRFS_INODE_NO_DELALLOC_FLUSH, }; /* in memory btrfs inode */ diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 0b29bdb251050c..30ea9780725ffe 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -146,6 +146,9 @@ enum { BTRFS_FS_STATE_DEV_REPLACING, /* The btrfs_fs_info created for self-tests */ BTRFS_FS_STATE_DUMMY_FS_INFO, + + /* Indicate that we can't trust the free space tree for caching yet */ + BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, }; #define BTRFS_BACKREF_REV_MAX 256 @@ -2593,7 +2596,6 @@ int btrfs_free_reserved_extent(struct btrfs_fs_info *fs_info, u64 start, u64 len, int delalloc); int btrfs_pin_reserved_extent(struct btrfs_trans_handle *trans, u64 start, u64 len); -void btrfs_prepare_extent_commit(struct btrfs_fs_info *fs_info); int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans); int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_ref *generic_ref); @@ -3002,7 +3004,8 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, u32 min_type); int btrfs_start_delalloc_snapshot(struct btrfs_root *root); -int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, u64 nr); +int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, u64 nr, + bool in_reclaim_context); int btrfs_set_extent_delalloc(struct btrfs_inode *inode, u64 start, u64 end, unsigned int extra_bits, struct extent_state **cached_state); diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index 10638537b9ef30..d297804631829d 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -703,7 +703,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, * flush all outstanding I/O and inode extent mappings before the * copy operation is declared as being finished */ - ret = btrfs_start_delalloc_roots(fs_info, U64_MAX); + ret = btrfs_start_delalloc_roots(fs_info, U64_MAX, false); if (ret) { mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); return ret; diff --git a/fs/btrfs/discard.c b/fs/btrfs/discard.c index 741c7e19c32f2c..9e1a06144e32d8 100644 --- a/fs/btrfs/discard.c +++ b/fs/btrfs/discard.c @@ -199,16 +199,15 @@ static struct btrfs_block_group *find_next_block_group( static struct btrfs_block_group *peek_discard_list( struct btrfs_discard_ctl *discard_ctl, enum btrfs_discard_state *discard_state, - int *discard_index) + int *discard_index, u64 now) { struct btrfs_block_group *block_group; - const u64 now = ktime_get_ns(); spin_lock(&discard_ctl->lock); again: block_group = find_next_block_group(discard_ctl, now); - if (block_group && now > block_group->discard_eligible_time) { + if (block_group && now >= block_group->discard_eligible_time) { if (block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED && block_group->used != 0) { if (btrfs_is_block_group_data_only(block_group)) @@ -222,12 +221,11 @@ static struct btrfs_block_group *peek_discard_list( block_group->discard_state = BTRFS_DISCARD_EXTENTS; } discard_ctl->block_group = block_group; + } + if (block_group) { *discard_state = block_group->discard_state; *discard_index = block_group->discard_index; - } else { - block_group = NULL; } - spin_unlock(&discard_ctl->lock); return block_group; @@ -330,28 +328,15 @@ void btrfs_discard_queue_work(struct btrfs_discard_ctl *discard_ctl, btrfs_discard_schedule_work(discard_ctl, false); } -/** - * btrfs_discard_schedule_work - responsible for scheduling the discard work - * @discard_ctl: discard control - * @override: override the current timer - * - * Discards are issued by a delayed workqueue item. @override is used to - * update the current delay as the baseline delay interval is reevaluated on - * transaction commit. This is also maxed with any other rate limit. - */ -void btrfs_discard_schedule_work(struct btrfs_discard_ctl *discard_ctl, - bool override) +static void __btrfs_discard_schedule_work(struct btrfs_discard_ctl *discard_ctl, + u64 now, bool override) { struct btrfs_block_group *block_group; - const u64 now = ktime_get_ns(); - - spin_lock(&discard_ctl->lock); if (!btrfs_run_discard_work(discard_ctl)) - goto out; - + return; if (!override && delayed_work_pending(&discard_ctl->work)) - goto out; + return; block_group = find_next_block_group(discard_ctl, now); if (block_group) { @@ -384,7 +369,24 @@ void btrfs_discard_schedule_work(struct btrfs_discard_ctl *discard_ctl, mod_delayed_work(discard_ctl->discard_workers, &discard_ctl->work, delay); } -out: +} + +/* + * btrfs_discard_schedule_work - responsible for scheduling the discard work + * @discard_ctl: discard control + * @override: override the current timer + * + * Discards are issued by a delayed workqueue item. @override is used to + * update the current delay as the baseline delay interval is reevaluated on + * transaction commit. This is also maxed with any other rate limit. + */ +void btrfs_discard_schedule_work(struct btrfs_discard_ctl *discard_ctl, + bool override) +{ + const u64 now = ktime_get_ns(); + + spin_lock(&discard_ctl->lock); + __btrfs_discard_schedule_work(discard_ctl, now, override); spin_unlock(&discard_ctl->lock); } @@ -429,13 +431,18 @@ static void btrfs_discard_workfn(struct work_struct *work) int discard_index = 0; u64 trimmed = 0; u64 minlen = 0; + u64 now = ktime_get_ns(); discard_ctl = container_of(work, struct btrfs_discard_ctl, work.work); block_group = peek_discard_list(discard_ctl, &discard_state, - &discard_index); + &discard_index, now); if (!block_group || !btrfs_run_discard_work(discard_ctl)) return; + if (now < block_group->discard_eligible_time) { + btrfs_discard_schedule_work(discard_ctl, false); + return; + } /* Perform discarding */ minlen = discard_minlen[discard_index]; @@ -484,9 +491,8 @@ static void btrfs_discard_workfn(struct work_struct *work) spin_lock(&discard_ctl->lock); discard_ctl->block_group = NULL; + __btrfs_discard_schedule_work(discard_ctl, now, false); spin_unlock(&discard_ctl->lock); - - btrfs_discard_schedule_work(discard_ctl, false); } /** diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index af97ddcc6b3e87..56f3b9acd21543 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1482,7 +1482,7 @@ void btrfs_check_leaked_roots(struct btrfs_fs_info *fs_info) root = list_first_entry(&fs_info->allocated_roots, struct btrfs_root, leak_list); btrfs_err(fs_info, "leaked root %s refcount %d", - btrfs_root_name(root->root_key.objectid, buf), + btrfs_root_name(&root->root_key, buf), refcount_read(&root->refs)); while (refcount_read(&root->refs) > 1) btrfs_put_root(root); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 5fd60b13f4f832..8fba1c219b190b 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2730,31 +2730,6 @@ btrfs_inc_block_group_reservations(struct btrfs_block_group *bg) atomic_inc(&bg->reservations); } -void btrfs_prepare_extent_commit(struct btrfs_fs_info *fs_info) -{ - struct btrfs_caching_control *next; - struct btrfs_caching_control *caching_ctl; - struct btrfs_block_group *cache; - - down_write(&fs_info->commit_root_sem); - - list_for_each_entry_safe(caching_ctl, next, - &fs_info->caching_block_groups, list) { - cache = caching_ctl->block_group; - if (btrfs_block_group_done(cache)) { - cache->last_byte_to_unpin = (u64)-1; - list_del_init(&caching_ctl->list); - btrfs_put_caching_control(caching_ctl); - } else { - cache->last_byte_to_unpin = caching_ctl->progress; - } - } - - up_write(&fs_info->commit_root_sem); - - btrfs_update_global_block_rsv(fs_info); -} - /* * Returns the free cluster for the given space info and sets empty_cluster to * what it should be based on the mount options. @@ -2816,10 +2791,10 @@ static int unpin_extent_range(struct btrfs_fs_info *fs_info, len = cache->start + cache->length - start; len = min(len, end + 1 - start); - if (start < cache->last_byte_to_unpin) { - len = min(len, cache->last_byte_to_unpin - start); - if (return_free_space) - btrfs_add_free_space(cache, start, len); + if (start < cache->last_byte_to_unpin && return_free_space) { + u64 add_len = min(len, cache->last_byte_to_unpin - start); + + btrfs_add_free_space(cache, start, add_len); } start += len; @@ -5596,7 +5571,15 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref, int for_reloc) goto out_free; } - trans = btrfs_start_transaction(tree_root, 0); + /* + * Use join to avoid potential EINTR from transaction + * start. See wait_reserve_ticket and the whole + * reservation callchain. + */ + if (for_reloc) + trans = btrfs_join_transaction(tree_root); + else + trans = btrfs_start_transaction(tree_root, 0); if (IS_ERR(trans)) { err = PTR_ERR(trans); goto out_free; diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 60f5f68d892dff..30cf917a58e92c 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -676,9 +676,7 @@ alloc_extent_state_atomic(struct extent_state *prealloc) static void extent_io_tree_panic(struct extent_io_tree *tree, int err) { - struct inode *inode = tree->private_data; - - btrfs_panic(btrfs_sb(inode->i_sb), err, + btrfs_panic(tree->fs_info, err, "locking error: extent tree was modified by another thread while locked"); } diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c index 6b9faf3b0e9676..6cf2f7bb30c275 100644 --- a/fs/btrfs/free-space-tree.c +++ b/fs/btrfs/free-space-tree.c @@ -1152,6 +1152,7 @@ int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info) return PTR_ERR(trans); set_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags); + set_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags); free_space_root = btrfs_create_tree(trans, BTRFS_FREE_SPACE_TREE_OBJECTID); if (IS_ERR(free_space_root)) { @@ -1173,11 +1174,18 @@ int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info) btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE); btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID); clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags); + ret = btrfs_commit_transaction(trans); - return btrfs_commit_transaction(trans); + /* + * Now that we've committed the transaction any reading of our commit + * root will be safe, so we can cache from the free space tree now. + */ + clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags); + return ret; abort: clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags); + clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags); btrfs_abort_transaction(trans, ret); btrfs_end_transaction(trans); return ret; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 7e8d8169779d24..acc47e2ffb46b4 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -9389,7 +9389,9 @@ static struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode * some fairly slow code that needs optimization. This walks the list * of all the inodes with pending delalloc and forces them to disk. */ -static int start_delalloc_inodes(struct btrfs_root *root, u64 *nr, bool snapshot) +static int start_delalloc_inodes(struct btrfs_root *root, + struct writeback_control *wbc, bool snapshot, + bool in_reclaim_context) { struct btrfs_inode *binode; struct inode *inode; @@ -9397,6 +9399,7 @@ static int start_delalloc_inodes(struct btrfs_root *root, u64 *nr, bool snapshot struct list_head works; struct list_head splice; int ret = 0; + bool full_flush = wbc->nr_to_write == LONG_MAX; INIT_LIST_HEAD(&works); INIT_LIST_HEAD(&splice); @@ -9410,6 +9413,11 @@ static int start_delalloc_inodes(struct btrfs_root *root, u64 *nr, bool snapshot list_move_tail(&binode->delalloc_inodes, &root->delalloc_inodes); + + if (in_reclaim_context && + test_bit(BTRFS_INODE_NO_DELALLOC_FLUSH, &binode->runtime_flags)) + continue; + inode = igrab(&binode->vfs_inode); if (!inode) { cond_resched_lock(&root->delalloc_lock); @@ -9420,18 +9428,24 @@ static int start_delalloc_inodes(struct btrfs_root *root, u64 *nr, bool snapshot if (snapshot) set_bit(BTRFS_INODE_SNAPSHOT_FLUSH, &binode->runtime_flags); - work = btrfs_alloc_delalloc_work(inode); - if (!work) { - iput(inode); - ret = -ENOMEM; - goto out; - } - list_add_tail(&work->list, &works); - btrfs_queue_work(root->fs_info->flush_workers, - &work->work); - if (*nr != U64_MAX) { - (*nr)--; - if (*nr == 0) + if (full_flush) { + work = btrfs_alloc_delalloc_work(inode); + if (!work) { + iput(inode); + ret = -ENOMEM; + goto out; + } + list_add_tail(&work->list, &works); + btrfs_queue_work(root->fs_info->flush_workers, + &work->work); + } else { + ret = sync_inode(inode, wbc); + if (!ret && + test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, + &BTRFS_I(inode)->runtime_flags)) + ret = sync_inode(inode, wbc); + btrfs_add_delayed_iput(inode); + if (ret || wbc->nr_to_write <= 0) goto out; } cond_resched(); @@ -9457,17 +9471,29 @@ static int start_delalloc_inodes(struct btrfs_root *root, u64 *nr, bool snapshot int btrfs_start_delalloc_snapshot(struct btrfs_root *root) { + struct writeback_control wbc = { + .nr_to_write = LONG_MAX, + .sync_mode = WB_SYNC_NONE, + .range_start = 0, + .range_end = LLONG_MAX, + }; struct btrfs_fs_info *fs_info = root->fs_info; - u64 nr = U64_MAX; if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) return -EROFS; - return start_delalloc_inodes(root, &nr, true); + return start_delalloc_inodes(root, &wbc, true, false); } -int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, u64 nr) +int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, u64 nr, + bool in_reclaim_context) { + struct writeback_control wbc = { + .nr_to_write = (nr == U64_MAX) ? LONG_MAX : (unsigned long)nr, + .sync_mode = WB_SYNC_NONE, + .range_start = 0, + .range_end = LLONG_MAX, + }; struct btrfs_root *root; struct list_head splice; int ret; @@ -9481,6 +9507,13 @@ int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, u64 nr) spin_lock(&fs_info->delalloc_root_lock); list_splice_init(&fs_info->delalloc_roots, &splice); while (!list_empty(&splice) && nr) { + /* + * Reset nr_to_write here so we know that we're doing a full + * flush. + */ + if (nr == U64_MAX) + wbc.nr_to_write = LONG_MAX; + root = list_first_entry(&splice, struct btrfs_root, delalloc_root); root = btrfs_grab_root(root); @@ -9489,9 +9522,9 @@ int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, u64 nr) &fs_info->delalloc_roots); spin_unlock(&fs_info->delalloc_root_lock); - ret = start_delalloc_inodes(root, &nr, false); + ret = start_delalloc_inodes(root, &wbc, false, in_reclaim_context); btrfs_put_root(root); - if (ret < 0) + if (ret < 0 || wbc.nr_to_write <= 0) goto out; spin_lock(&fs_info->delalloc_root_lock); } diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 69a384145dc6fc..bd46e107f955ec 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1275,6 +1275,7 @@ static int cluster_pages_for_defrag(struct inode *inode, u64 page_end; u64 page_cnt; u64 start = (u64)start_index << PAGE_SHIFT; + u64 search_start; int ret; int i; int i_done; @@ -1371,6 +1372,40 @@ static int cluster_pages_for_defrag(struct inode *inode, lock_extent_bits(&BTRFS_I(inode)->io_tree, page_start, page_end - 1, &cached_state); + + /* + * When defragmenting we skip ranges that have holes or inline extents, + * (check should_defrag_range()), to avoid unnecessary IO and wasting + * space. At btrfs_defrag_file(), we check if a range should be defragged + * before locking the inode and then, if it should, we trigger a sync + * page cache readahead - we lock the inode only after that to avoid + * blocking for too long other tasks that possibly want to operate on + * other file ranges. But before we were able to get the inode lock, + * some other task may have punched a hole in the range, or we may have + * now an inline extent, in which case we should not defrag. So check + * for that here, where we have the inode and the range locked, and bail + * out if that happened. + */ + search_start = page_start; + while (search_start < page_end) { + struct extent_map *em; + + em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, search_start, + page_end - search_start); + if (IS_ERR(em)) { + ret = PTR_ERR(em); + goto out_unlock_range; + } + if (em->block_start >= EXTENT_MAP_LAST_BYTE) { + free_extent_map(em); + /* Ok, 0 means we did not defrag anything */ + ret = 0; + goto out_unlock_range; + } + search_start = extent_map_end(em); + free_extent_map(em); + } + clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, page_end - 1, EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 0, 0, &cached_state); @@ -1401,6 +1436,10 @@ static int cluster_pages_for_defrag(struct inode *inode, btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT); extent_changeset_free(data_reserved); return i_done; + +out_unlock_range: + unlock_extent_cached(&BTRFS_I(inode)->io_tree, + page_start, page_end - 1, &cached_state); out: for (i = 0; i < i_done; i++) { unlock_page(pages[i]); @@ -4901,7 +4940,7 @@ long btrfs_ioctl(struct file *file, unsigned int case BTRFS_IOC_SYNC: { int ret; - ret = btrfs_start_delalloc_roots(fs_info, U64_MAX); + ret = btrfs_start_delalloc_roots(fs_info, U64_MAX, false); if (ret) return ret; ret = btrfs_sync_fs(inode->i_sb, 1); diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index 7695c4783d33b0..c62771f3af8c62 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -26,22 +26,22 @@ static const struct root_name_map root_map[] = { { BTRFS_DATA_RELOC_TREE_OBJECTID, "DATA_RELOC_TREE" }, }; -const char *btrfs_root_name(u64 objectid, char *buf) +const char *btrfs_root_name(const struct btrfs_key *key, char *buf) { int i; - if (objectid == BTRFS_TREE_RELOC_OBJECTID) { + if (key->objectid == BTRFS_TREE_RELOC_OBJECTID) { snprintf(buf, BTRFS_ROOT_NAME_BUF_LEN, - "TREE_RELOC offset=%llu", objectid); + "TREE_RELOC offset=%llu", key->offset); return buf; } for (i = 0; i < ARRAY_SIZE(root_map); i++) { - if (root_map[i].id == objectid) + if (root_map[i].id == key->objectid) return root_map[i].name; } - snprintf(buf, BTRFS_ROOT_NAME_BUF_LEN, "%llu", objectid); + snprintf(buf, BTRFS_ROOT_NAME_BUF_LEN, "%llu", key->objectid); return buf; } diff --git a/fs/btrfs/print-tree.h b/fs/btrfs/print-tree.h index 78b99385a503fb..8c3e9319ec4efe 100644 --- a/fs/btrfs/print-tree.h +++ b/fs/btrfs/print-tree.h @@ -11,6 +11,6 @@ void btrfs_print_leaf(struct extent_buffer *l); void btrfs_print_tree(struct extent_buffer *c, bool follow); -const char *btrfs_root_name(u64 objectid, char *buf); +const char *btrfs_root_name(const struct btrfs_key *key, char *buf); #endif diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 87bd37b70738ec..d504a9a207515c 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -3224,6 +3224,12 @@ static int qgroup_rescan_leaf(struct btrfs_trans_handle *trans, return ret; } +static bool rescan_should_stop(struct btrfs_fs_info *fs_info) +{ + return btrfs_fs_closing(fs_info) || + test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state); +} + static void btrfs_qgroup_rescan_worker(struct btrfs_work *work) { struct btrfs_fs_info *fs_info = container_of(work, struct btrfs_fs_info, @@ -3232,6 +3238,7 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work) struct btrfs_trans_handle *trans = NULL; int err = -ENOMEM; int ret = 0; + bool stopped = false; path = btrfs_alloc_path(); if (!path) @@ -3244,7 +3251,7 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work) path->skip_locking = 1; err = 0; - while (!err && !btrfs_fs_closing(fs_info)) { + while (!err && !(stopped = rescan_should_stop(fs_info))) { trans = btrfs_start_transaction(fs_info->fs_root, 0); if (IS_ERR(trans)) { err = PTR_ERR(trans); @@ -3287,7 +3294,7 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work) } mutex_lock(&fs_info->qgroup_rescan_lock); - if (!btrfs_fs_closing(fs_info)) + if (!stopped) fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; if (trans) { ret = update_qgroup_status_item(trans); @@ -3306,7 +3313,7 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work) btrfs_end_transaction(trans); - if (btrfs_fs_closing(fs_info)) { + if (stopped) { btrfs_info(fs_info, "qgroup scan paused"); } else if (err >= 0) { btrfs_info(fs_info, "qgroup scan completed%s", @@ -3564,16 +3571,6 @@ static int try_flush_qgroup(struct btrfs_root *root) int ret; bool can_commit = true; - /* - * We don't want to run flush again and again, so if there is a running - * one, we won't try to start a new flush, but exit directly. - */ - if (test_and_set_bit(BTRFS_ROOT_QGROUP_FLUSHING, &root->state)) { - wait_event(root->qgroup_flush_wait, - !test_bit(BTRFS_ROOT_QGROUP_FLUSHING, &root->state)); - return 0; - } - /* * If current process holds a transaction, we shouldn't flush, as we * assume all space reservation happens before a transaction handle is @@ -3588,6 +3585,26 @@ static int try_flush_qgroup(struct btrfs_root *root) current->journal_info != BTRFS_SEND_TRANS_STUB) can_commit = false; + /* + * We don't want to run flush again and again, so if there is a running + * one, we won't try to start a new flush, but exit directly. + */ + if (test_and_set_bit(BTRFS_ROOT_QGROUP_FLUSHING, &root->state)) { + /* + * We are already holding a transaction, thus we can block other + * threads from flushing. So exit right now. This increases + * the chance of EDQUOT for heavy load and near limit cases. + * But we can argue that if we're already near limit, EDQUOT is + * unavoidable anyway. + */ + if (!can_commit) + return 0; + + wait_event(root->qgroup_flush_wait, + !test_bit(BTRFS_ROOT_QGROUP_FLUSHING, &root->state)); + return 0; + } + ret = btrfs_start_delalloc_snapshot(root); if (ret < 0) goto out; diff --git a/fs/btrfs/reflink.c b/fs/btrfs/reflink.c index 99aa87c089121b..a646af95dd100b 100644 --- a/fs/btrfs/reflink.c +++ b/fs/btrfs/reflink.c @@ -89,6 +89,19 @@ static int copy_inline_to_page(struct btrfs_inode *inode, if (ret) goto out_unlock; + /* + * After dirtying the page our caller will need to start a transaction, + * and if we are low on metadata free space, that can cause flushing of + * delalloc for all inodes in order to get metadata space released. + * However we are holding the range locked for the whole duration of + * the clone/dedupe operation, so we may deadlock if that happens and no + * other task releases enough space. So mark this inode as not being + * possible to flush to avoid such deadlock. We will clear that flag + * when we finish cloning all extents, since a transaction is started + * after finding each extent to clone. + */ + set_bit(BTRFS_INODE_NO_DELALLOC_FLUSH, &inode->runtime_flags); + if (comp_type == BTRFS_COMPRESS_NONE) { char *map; @@ -547,6 +560,8 @@ static int btrfs_clone(struct inode *src, struct inode *inode, out: btrfs_free_path(path); kvfree(buf); + clear_bit(BTRFS_INODE_NO_DELALLOC_FLUSH, &BTRFS_I(inode)->runtime_flags); + return ret; } diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 9ba92d86da0bf0..108e93ff6cb6f8 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -3027,11 +3027,16 @@ static int delete_v1_space_cache(struct extent_buffer *leaf, return 0; for (i = 0; i < btrfs_header_nritems(leaf); i++) { + u8 type; + btrfs_item_key_to_cpu(leaf, &key, i); if (key.type != BTRFS_EXTENT_DATA_KEY) continue; ei = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); - if (btrfs_file_extent_type(leaf, ei) == BTRFS_FILE_EXTENT_REG && + type = btrfs_file_extent_type(leaf, ei); + + if ((type == BTRFS_FILE_EXTENT_REG || + type == BTRFS_FILE_EXTENT_PREALLOC) && btrfs_file_extent_disk_bytenr(leaf, ei) == data_bytenr) { found = true; space_cache_ino = key.objectid; diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 340c76a12ce100..9e5809118c34d2 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -236,6 +236,7 @@ struct waiting_dir_move { * after this directory is moved, we can try to rmdir the ino rmdir_ino. */ u64 rmdir_ino; + u64 rmdir_gen; bool orphanized; }; @@ -316,7 +317,7 @@ static int is_waiting_for_move(struct send_ctx *sctx, u64 ino); static struct waiting_dir_move * get_waiting_dir_move(struct send_ctx *sctx, u64 ino); -static int is_waiting_for_rm(struct send_ctx *sctx, u64 dir_ino); +static int is_waiting_for_rm(struct send_ctx *sctx, u64 dir_ino, u64 gen); static int need_send_hole(struct send_ctx *sctx) { @@ -2299,7 +2300,7 @@ static int get_cur_path(struct send_ctx *sctx, u64 ino, u64 gen, fs_path_reset(name); - if (is_waiting_for_rm(sctx, ino)) { + if (is_waiting_for_rm(sctx, ino, gen)) { ret = gen_unique_name(sctx, ino, gen, name); if (ret < 0) goto out; @@ -2858,8 +2859,8 @@ static int orphanize_inode(struct send_ctx *sctx, u64 ino, u64 gen, return ret; } -static struct orphan_dir_info * -add_orphan_dir_info(struct send_ctx *sctx, u64 dir_ino) +static struct orphan_dir_info *add_orphan_dir_info(struct send_ctx *sctx, + u64 dir_ino, u64 dir_gen) { struct rb_node **p = &sctx->orphan_dirs.rb_node; struct rb_node *parent = NULL; @@ -2868,20 +2869,23 @@ add_orphan_dir_info(struct send_ctx *sctx, u64 dir_ino) while (*p) { parent = *p; entry = rb_entry(parent, struct orphan_dir_info, node); - if (dir_ino < entry->ino) { + if (dir_ino < entry->ino) p = &(*p)->rb_left; - } else if (dir_ino > entry->ino) { + else if (dir_ino > entry->ino) p = &(*p)->rb_right; - } else { + else if (dir_gen < entry->gen) + p = &(*p)->rb_left; + else if (dir_gen > entry->gen) + p = &(*p)->rb_right; + else return entry; - } } odi = kmalloc(sizeof(*odi), GFP_KERNEL); if (!odi) return ERR_PTR(-ENOMEM); odi->ino = dir_ino; - odi->gen = 0; + odi->gen = dir_gen; odi->last_dir_index_offset = 0; rb_link_node(&odi->node, parent, p); @@ -2889,8 +2893,8 @@ add_orphan_dir_info(struct send_ctx *sctx, u64 dir_ino) return odi; } -static struct orphan_dir_info * -get_orphan_dir_info(struct send_ctx *sctx, u64 dir_ino) +static struct orphan_dir_info *get_orphan_dir_info(struct send_ctx *sctx, + u64 dir_ino, u64 gen) { struct rb_node *n = sctx->orphan_dirs.rb_node; struct orphan_dir_info *entry; @@ -2901,15 +2905,19 @@ get_orphan_dir_info(struct send_ctx *sctx, u64 dir_ino) n = n->rb_left; else if (dir_ino > entry->ino) n = n->rb_right; + else if (gen < entry->gen) + n = n->rb_left; + else if (gen > entry->gen) + n = n->rb_right; else return entry; } return NULL; } -static int is_waiting_for_rm(struct send_ctx *sctx, u64 dir_ino) +static int is_waiting_for_rm(struct send_ctx *sctx, u64 dir_ino, u64 gen) { - struct orphan_dir_info *odi = get_orphan_dir_info(sctx, dir_ino); + struct orphan_dir_info *odi = get_orphan_dir_info(sctx, dir_ino, gen); return odi != NULL; } @@ -2954,7 +2962,7 @@ static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 dir_gen, key.type = BTRFS_DIR_INDEX_KEY; key.offset = 0; - odi = get_orphan_dir_info(sctx, dir); + odi = get_orphan_dir_info(sctx, dir, dir_gen); if (odi) key.offset = odi->last_dir_index_offset; @@ -2985,7 +2993,7 @@ static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 dir_gen, dm = get_waiting_dir_move(sctx, loc.objectid); if (dm) { - odi = add_orphan_dir_info(sctx, dir); + odi = add_orphan_dir_info(sctx, dir, dir_gen); if (IS_ERR(odi)) { ret = PTR_ERR(odi); goto out; @@ -2993,12 +3001,13 @@ static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 dir_gen, odi->gen = dir_gen; odi->last_dir_index_offset = found_key.offset; dm->rmdir_ino = dir; + dm->rmdir_gen = dir_gen; ret = 0; goto out; } if (loc.objectid > send_progress) { - odi = add_orphan_dir_info(sctx, dir); + odi = add_orphan_dir_info(sctx, dir, dir_gen); if (IS_ERR(odi)) { ret = PTR_ERR(odi); goto out; @@ -3038,6 +3047,7 @@ static int add_waiting_dir_move(struct send_ctx *sctx, u64 ino, bool orphanized) return -ENOMEM; dm->ino = ino; dm->rmdir_ino = 0; + dm->rmdir_gen = 0; dm->orphanized = orphanized; while (*p) { @@ -3183,7 +3193,7 @@ static int path_loop(struct send_ctx *sctx, struct fs_path *name, while (ino != BTRFS_FIRST_FREE_OBJECTID) { fs_path_reset(name); - if (is_waiting_for_rm(sctx, ino)) + if (is_waiting_for_rm(sctx, ino, gen)) break; if (is_waiting_for_move(sctx, ino)) { if (*ancestor_ino == 0) @@ -3223,6 +3233,7 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) u64 parent_ino, parent_gen; struct waiting_dir_move *dm = NULL; u64 rmdir_ino = 0; + u64 rmdir_gen; u64 ancestor; bool is_orphan; int ret; @@ -3237,6 +3248,7 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) dm = get_waiting_dir_move(sctx, pm->ino); ASSERT(dm); rmdir_ino = dm->rmdir_ino; + rmdir_gen = dm->rmdir_gen; is_orphan = dm->orphanized; free_waiting_dir_move(sctx, dm); @@ -3273,6 +3285,7 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) dm = get_waiting_dir_move(sctx, pm->ino); ASSERT(dm); dm->rmdir_ino = rmdir_ino; + dm->rmdir_gen = rmdir_gen; } goto out; } @@ -3291,7 +3304,7 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) struct orphan_dir_info *odi; u64 gen; - odi = get_orphan_dir_info(sctx, rmdir_ino); + odi = get_orphan_dir_info(sctx, rmdir_ino, rmdir_gen); if (!odi) { /* already deleted */ goto finish; @@ -5499,6 +5512,21 @@ static int clone_range(struct send_ctx *sctx, break; offset += clone_len; clone_root->offset += clone_len; + + /* + * If we are cloning from the file we are currently processing, + * and using the send root as the clone root, we must stop once + * the current clone offset reaches the current eof of the file + * at the receiver, otherwise we would issue an invalid clone + * operation (source range going beyond eof) and cause the + * receiver to fail. So if we reach the current eof, bail out + * and fallback to a regular write. + */ + if (clone_root->root == sctx->send_root && + clone_root->ino == sctx->cur_ino && + clone_root->offset >= sctx->cur_inode_next_write_offset) + break; + data_offset += clone_len; next: path->slots[0]++; diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c index 64099565ab8f5d..e8347461c8dddd 100644 --- a/fs/btrfs/space-info.c +++ b/fs/btrfs/space-info.c @@ -532,7 +532,9 @@ static void shrink_delalloc(struct btrfs_fs_info *fs_info, loops = 0; while ((delalloc_bytes || dio_bytes) && loops < 3) { - btrfs_start_delalloc_roots(fs_info, items); + u64 nr_pages = min(delalloc_bytes, to_reclaim) >> PAGE_SHIFT; + + btrfs_start_delalloc_roots(fs_info, nr_pages, true); loops++; if (wait_ordered && !trans) { diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 8840a4fa81eb78..2663485c17cb88 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1895,6 +1895,14 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) btrfs_scrub_cancel(fs_info); btrfs_pause_balance(fs_info); + /* + * Pause the qgroup rescan worker if it is running. We don't want + * it to be still running after we are in RO mode, as after that, + * by the time we unmount, it might have left a transaction open, + * so we would leak the transaction and/or crash. + */ + btrfs_qgroup_wait_for_completion(fs_info, false); + ret = btrfs_commit_super(fs_info); if (ret) goto restore; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 52ada47aff50d7..96dbfc011f45dd 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -155,6 +155,7 @@ static noinline void switch_commit_roots(struct btrfs_trans_handle *trans) struct btrfs_transaction *cur_trans = trans->transaction; struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_root *root, *tmp; + struct btrfs_caching_control *caching_ctl, *next; down_write(&fs_info->commit_root_sem); list_for_each_entry_safe(root, tmp, &cur_trans->switch_commits, @@ -180,6 +181,45 @@ static noinline void switch_commit_roots(struct btrfs_trans_handle *trans) spin_lock(&cur_trans->dropped_roots_lock); } spin_unlock(&cur_trans->dropped_roots_lock); + + /* + * We have to update the last_byte_to_unpin under the commit_root_sem, + * at the same time we swap out the commit roots. + * + * This is because we must have a real view of the last spot the caching + * kthreads were while caching. Consider the following views of the + * extent tree for a block group + * + * commit root + * +----+----+----+----+----+----+----+ + * |\\\\| |\\\\|\\\\| |\\\\|\\\\| + * +----+----+----+----+----+----+----+ + * 0 1 2 3 4 5 6 7 + * + * new commit root + * +----+----+----+----+----+----+----+ + * | | | |\\\\| | |\\\\| + * +----+----+----+----+----+----+----+ + * 0 1 2 3 4 5 6 7 + * + * If the cache_ctl->progress was at 3, then we are only allowed to + * unpin [0,1) and [2,3], because the caching thread has already + * processed those extents. We are not allowed to unpin [5,6), because + * the caching thread will re-start it's search from 3, and thus find + * the hole from [4,6) to add to the free space cache. + */ + list_for_each_entry_safe(caching_ctl, next, + &fs_info->caching_block_groups, list) { + struct btrfs_block_group *cache = caching_ctl->block_group; + + if (btrfs_block_group_done(cache)) { + cache->last_byte_to_unpin = (u64)-1; + list_del_init(&caching_ctl->list); + btrfs_put_caching_control(caching_ctl); + } else { + cache->last_byte_to_unpin = caching_ctl->progress; + } + } up_write(&fs_info->commit_root_sem); } @@ -2293,8 +2333,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) goto unlock_tree_log; } - btrfs_prepare_extent_commit(fs_info); - cur_trans = fs_info->running_transaction; btrfs_set_root_node(&fs_info->tree_root->root_item, diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index ea2bb4cb58909b..40845428b739c7 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -754,6 +754,7 @@ int btrfs_check_chunk_valid(struct extent_buffer *leaf, { struct btrfs_fs_info *fs_info = leaf->fs_info; u64 length; + u64 chunk_end; u64 stripe_len; u16 num_stripes; u16 sub_stripes; @@ -808,6 +809,12 @@ int btrfs_check_chunk_valid(struct extent_buffer *leaf, "invalid chunk length, have %llu", length); return -EUCLEAN; } + if (unlikely(check_add_overflow(logical, length, &chunk_end))) { + chunk_err(leaf, chunk, logical, +"invalid chunk logical start and length, have logical start %llu length %llu", + logical, length); + return -EUCLEAN; + } if (!is_power_of_2(stripe_len) || stripe_len != BTRFS_STRIPE_LEN) { chunk_err(leaf, chunk, logical, "invalid chunk stripe length: %llu", diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 56cbc1706b6f7e..5b11bb97706641 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -4571,6 +4571,10 @@ static int btrfs_log_all_xattrs(struct btrfs_trans_handle *trans, const u64 ino = btrfs_ino(inode); int ins_nr = 0; int start_slot = 0; + bool found_xattrs = false; + + if (test_bit(BTRFS_INODE_NO_XATTRS, &inode->runtime_flags)) + return 0; key.objectid = ino; key.type = BTRFS_XATTR_ITEM_KEY; @@ -4609,6 +4613,7 @@ static int btrfs_log_all_xattrs(struct btrfs_trans_handle *trans, start_slot = slot; ins_nr++; path->slots[0]++; + found_xattrs = true; cond_resched(); } if (ins_nr > 0) { @@ -4618,6 +4623,9 @@ static int btrfs_log_all_xattrs(struct btrfs_trans_handle *trans, return ret; } + if (!found_xattrs) + set_bit(BTRFS_INODE_NO_XATTRS, &inode->runtime_flags); + return 0; } diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 78637665166e05..f9ae3850526c6d 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -431,7 +431,7 @@ static struct btrfs_device *__alloc_device(struct btrfs_fs_info *fs_info) atomic_set(&dev->reada_in_flight, 0); atomic_set(&dev->dev_stats_ccnt, 0); - btrfs_device_data_ordered_init(dev, fs_info); + btrfs_device_data_ordered_init(dev); INIT_RADIX_TREE(&dev->reada_zones, GFP_NOFS & ~__GFP_DIRECT_RECLAIM); INIT_RADIX_TREE(&dev->reada_extents, GFP_NOFS & ~__GFP_DIRECT_RECLAIM); extent_io_tree_init(fs_info, &dev->alloc_state, @@ -4288,6 +4288,8 @@ int btrfs_recover_balance(struct btrfs_fs_info *fs_info) btrfs_warn(fs_info, "balance: cannot set exclusive op status, resume manually"); + btrfs_release_path(path); + mutex_lock(&fs_info->balance_mutex); BUG_ON(fs_info->balance_ctl); spin_lock(&fs_info->balance_lock); diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 232f02bd214fc4..f2177263748e8a 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -39,10 +39,10 @@ struct btrfs_io_geometry { #if BITS_PER_LONG==32 && defined(CONFIG_SMP) #include #define __BTRFS_NEED_DEVICE_DATA_ORDERED -#define btrfs_device_data_ordered_init(device, info) \ - seqcount_mutex_init(&device->data_seqcount, &info->chunk_mutex) +#define btrfs_device_data_ordered_init(device) \ + seqcount_init(&device->data_seqcount) #else -#define btrfs_device_data_ordered_init(device, info) do { } while (0) +#define btrfs_device_data_ordered_init(device) do { } while (0) #endif #define BTRFS_DEV_STATE_WRITEABLE (0) @@ -72,8 +72,7 @@ struct btrfs_device { blk_status_t last_flush_error; #ifdef __BTRFS_NEED_DEVICE_DATA_ORDERED - /* A seqcount_t with associated chunk_mutex (for lockdep) */ - seqcount_mutex_t data_seqcount; + seqcount_t data_seqcount; #endif /* the internal btrfs device id */ @@ -164,9 +163,11 @@ btrfs_device_get_##name(const struct btrfs_device *dev) \ static inline void \ btrfs_device_set_##name(struct btrfs_device *dev, u64 size) \ { \ + preempt_disable(); \ write_seqcount_begin(&dev->data_seqcount); \ dev->name = size; \ write_seqcount_end(&dev->data_seqcount); \ + preempt_enable(); \ } #elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION) #define BTRFS_DEVICE_GETSET_FUNCS(name) \ diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 95d9aebff2c4b0..e51774201d53b8 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -213,9 +213,11 @@ int btrfs_setxattr(struct btrfs_trans_handle *trans, struct inode *inode, } out: btrfs_free_path(path); - if (!ret) + if (!ret) { set_bit(BTRFS_INODE_COPY_EVERYTHING, &BTRFS_I(inode)->runtime_flags); + clear_bit(BTRFS_INODE_NO_XATTRS, &BTRFS_I(inode)->runtime_flags); + } return ret; } diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c index 8bda092e60c5a0..e027c718ca01ad 100644 --- a/fs/cachefiles/rdwr.c +++ b/fs/cachefiles/rdwr.c @@ -413,7 +413,6 @@ int cachefiles_read_or_alloc_page(struct fscache_retrieval *op, inode = d_backing_inode(object->backer); ASSERT(S_ISREG(inode->i_mode)); - ASSERT(inode->i_mapping->a_ops->readpages); /* calculate the shift required to use bmap */ shift = PAGE_SHIFT - inode->i_sb->s_blocksize_bits; @@ -713,7 +712,6 @@ int cachefiles_read_or_alloc_pages(struct fscache_retrieval *op, inode = d_backing_inode(object->backer); ASSERT(S_ISREG(inode->i_mode)); - ASSERT(inode->i_mapping->a_ops->readpages); /* calculate the shift required to use bmap */ shift = PAGE_SHIFT - inode->i_sb->s_blocksize_bits; diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index ded4229c314a02..2b200b5a44c3a4 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -1140,12 +1140,19 @@ void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release) { struct ceph_mds_session *session = cap->session; struct ceph_inode_info *ci = cap->ci; - struct ceph_mds_client *mdsc = - ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc; + struct ceph_mds_client *mdsc; int removed = 0; + /* 'ci' being NULL means the remove have already occurred */ + if (!ci) { + dout("%s: cap inode is NULL\n", __func__); + return; + } + dout("__ceph_remove_cap %p from %p\n", cap, &ci->vfs_inode); + mdsc = ceph_inode_to_client(&ci->vfs_inode)->mdsc; + /* remove from inode's cap rbtree, and clear auth cap */ rb_erase(&cap->ci_node, &ci->i_caps); if (ci->i_auth_cap == cap) { diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 526faf4778ce43..2462a9a84b9567 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -1335,6 +1335,8 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req) in, ceph_vinop(in)); if (in->i_state & I_NEW) discard_new_inode(in); + else + iput(in); goto done; } req->r_target_inode = in; diff --git a/fs/cifs/dfs_cache.c b/fs/cifs/dfs_cache.c index 6ee849698962d1..7b6db272fd0b81 100644 --- a/fs/cifs/dfs_cache.c +++ b/fs/cifs/dfs_cache.c @@ -1317,7 +1317,8 @@ void dfs_cache_del_vol(const char *fullpath) vi = find_vol(fullpath); spin_unlock(&vol_list_lock); - kref_put(&vi->refcnt, vol_release); + if (!IS_ERR(vi)) + kref_put(&vi->refcnt, vol_release); } /** diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c index d88e2683626e7e..2da6b41cb5526a 100644 --- a/fs/cifs/smb2misc.c +++ b/fs/cifs/smb2misc.c @@ -94,6 +94,8 @@ static const __le16 smb2_rsp_struct_sizes[NUMBER_OF_SMB2_COMMANDS] = { /* SMB2_OPLOCK_BREAK */ cpu_to_le16(24) }; +#define SMB311_NEGPROT_BASE_SIZE (sizeof(struct smb2_sync_hdr) + sizeof(struct smb2_negotiate_rsp)) + static __u32 get_neg_ctxt_len(struct smb2_sync_hdr *hdr, __u32 len, __u32 non_ctxlen) { @@ -109,11 +111,17 @@ static __u32 get_neg_ctxt_len(struct smb2_sync_hdr *hdr, __u32 len, /* Make sure that negotiate contexts start after gss security blob */ nc_offset = le32_to_cpu(pneg_rsp->NegotiateContextOffset); - if (nc_offset < non_ctxlen) { - pr_warn_once("Invalid negotiate context offset\n"); + if (nc_offset + 1 < non_ctxlen) { + pr_warn_once("Invalid negotiate context offset %d\n", nc_offset); return 0; - } - size_of_pad_before_neg_ctxts = nc_offset - non_ctxlen; + } else if (nc_offset + 1 == non_ctxlen) { + cifs_dbg(FYI, "no SPNEGO security blob in negprot rsp\n"); + size_of_pad_before_neg_ctxts = 0; + } else if (non_ctxlen == SMB311_NEGPROT_BASE_SIZE) + /* has padding, but no SPNEGO blob */ + size_of_pad_before_neg_ctxts = nc_offset - non_ctxlen + 1; + else + size_of_pad_before_neg_ctxts = nc_offset - non_ctxlen; /* Verify that at least minimal negotiate contexts fit within frame */ if (len < nc_offset + (neg_count * sizeof(struct smb2_neg_context))) { diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 3d914d7d0d1100..22f1d8dc12b009 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -477,7 +477,8 @@ parse_server_interfaces(struct network_interface_info_ioctl_rsp *buf, goto out; } - if (bytes_left || p->Next) + /* Azure rounds the buffer size up 8, to a 16 byte boundary */ + if ((bytes_left > 8) || p->Next) cifs_dbg(VFS, "%s: incomplete interface info\n", __func__); diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index acb72705062dd0..c6f8bc6729aa10 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -427,8 +427,8 @@ build_preauth_ctxt(struct smb2_preauth_neg_context *pneg_ctxt) pneg_ctxt->ContextType = SMB2_PREAUTH_INTEGRITY_CAPABILITIES; pneg_ctxt->DataLength = cpu_to_le16(38); pneg_ctxt->HashAlgorithmCount = cpu_to_le16(1); - pneg_ctxt->SaltLength = cpu_to_le16(SMB311_SALT_SIZE); - get_random_bytes(pneg_ctxt->Salt, SMB311_SALT_SIZE); + pneg_ctxt->SaltLength = cpu_to_le16(SMB311_LINUX_CLIENT_SALT_SIZE); + get_random_bytes(pneg_ctxt->Salt, SMB311_LINUX_CLIENT_SALT_SIZE); pneg_ctxt->HashAlgorithms = SMB2_PREAUTH_INTEGRITY_SHA512; } @@ -566,6 +566,9 @@ static void decode_preauth_context(struct smb2_preauth_neg_context *ctxt) if (len < MIN_PREAUTH_CTXT_DATA_LEN) { pr_warn_once("server sent bad preauth context\n"); return; + } else if (len < MIN_PREAUTH_CTXT_DATA_LEN + le16_to_cpu(ctxt->SaltLength)) { + pr_warn_once("server sent invalid SaltLength\n"); + return; } if (le16_to_cpu(ctxt->HashAlgorithmCount) != 1) pr_warn_once("Invalid SMB3 hash algorithm count\n"); @@ -3245,7 +3248,7 @@ __SMB2_close(const unsigned int xid, struct cifs_tcon *tcon, free_rsp_buf(resp_buftype, rsp); /* retry close in a worker thread if this one is interrupted */ - if (rc == -EINTR) { + if (is_interrupt_error(rc)) { int tmp_rc; tmp_rc = smb2_handle_cancelled_close(tcon, persistent_fid, diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index fa57b03ca98c49..204a622b89ed35 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -333,12 +333,20 @@ struct smb2_neg_context { /* Followed by array of data */ } __packed; -#define SMB311_SALT_SIZE 32 +#define SMB311_LINUX_CLIENT_SALT_SIZE 32 /* Hash Algorithm Types */ #define SMB2_PREAUTH_INTEGRITY_SHA512 cpu_to_le16(0x0001) #define SMB2_PREAUTH_HASH_SIZE 64 -#define MIN_PREAUTH_CTXT_DATA_LEN (SMB311_SALT_SIZE + 6) +/* + * SaltLength that the server send can be zero, so the only three required + * fields (all __le16) end up six bytes total, so the minimum context data len + * in the response is six bytes which accounts for + * + * HashAlgorithmCount, SaltLength, and 1 HashAlgorithm. + */ +#define MIN_PREAUTH_CTXT_DATA_LEN 6 + struct smb2_preauth_neg_context { __le16 ContextType; /* 1 */ __le16 DataLength; @@ -346,7 +354,7 @@ struct smb2_preauth_neg_context { __le16 HashAlgorithmCount; /* 1 */ __le16 SaltLength; __le16 HashAlgorithms; /* HashAlgorithms[0] since only one defined */ - __u8 Salt[SMB311_SALT_SIZE]; + __u8 Salt[SMB311_LINUX_CLIENT_SALT_SIZE]; } __packed; /* Encryption Algorithms Ciphers */ diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 36b2ece434037a..b1c2f416b9bd9b 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -338,7 +338,7 @@ __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst, if (ssocket == NULL) return -EAGAIN; - if (signal_pending(current)) { + if (fatal_signal_pending(current)) { cifs_dbg(FYI, "signal pending before send request\n"); return -ERESTARTSYS; } @@ -429,7 +429,7 @@ __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst, if (signal_pending(current) && (total_len != send_length)) { cifs_dbg(FYI, "signal is pending after attempt to send\n"); - rc = -EINTR; + rc = -ERESTARTSYS; } /* uncork it */ diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h index 4f5806a3b73d78..322ecae9a75809 100644 --- a/fs/crypto/fscrypt_private.h +++ b/fs/crypto/fscrypt_private.h @@ -25,6 +25,9 @@ #define FSCRYPT_CONTEXT_V1 1 #define FSCRYPT_CONTEXT_V2 2 +/* Keep this in sync with include/uapi/linux/fscrypt.h */ +#define FSCRYPT_MODE_MAX FSCRYPT_MODE_ADIANTUM + struct fscrypt_context_v1 { u8 version; /* FSCRYPT_CONTEXT_V1 */ u8 contents_encryption_mode; @@ -491,9 +494,9 @@ struct fscrypt_master_key { * Per-mode encryption keys for the various types of encryption policies * that use them. Allocated and derived on-demand. */ - struct fscrypt_prepared_key mk_direct_keys[__FSCRYPT_MODE_MAX + 1]; - struct fscrypt_prepared_key mk_iv_ino_lblk_64_keys[__FSCRYPT_MODE_MAX + 1]; - struct fscrypt_prepared_key mk_iv_ino_lblk_32_keys[__FSCRYPT_MODE_MAX + 1]; + struct fscrypt_prepared_key mk_direct_keys[FSCRYPT_MODE_MAX + 1]; + struct fscrypt_prepared_key mk_iv_ino_lblk_64_keys[FSCRYPT_MODE_MAX + 1]; + struct fscrypt_prepared_key mk_iv_ino_lblk_32_keys[FSCRYPT_MODE_MAX + 1]; /* Hash key for inode numbers. Initialized only when needed. */ siphash_key_t mk_ino_hash_key; diff --git a/fs/crypto/hooks.c b/fs/crypto/hooks.c index 20b0df47fe6ab5..061418be4b0863 100644 --- a/fs/crypto/hooks.c +++ b/fs/crypto/hooks.c @@ -61,7 +61,7 @@ int __fscrypt_prepare_link(struct inode *inode, struct inode *dir, return err; /* ... in case we looked up no-key name before key was added */ - if (dentry->d_flags & DCACHE_NOKEY_NAME) + if (fscrypt_is_nokey_name(dentry)) return -ENOKEY; if (!fscrypt_has_permitted_context(dir, inode)) @@ -86,7 +86,8 @@ int __fscrypt_prepare_rename(struct inode *old_dir, struct dentry *old_dentry, return err; /* ... in case we looked up no-key name(s) before key was added */ - if ((old_dentry->d_flags | new_dentry->d_flags) & DCACHE_NOKEY_NAME) + if (fscrypt_is_nokey_name(old_dentry) || + fscrypt_is_nokey_name(new_dentry)) return -ENOKEY; if (old_dir != new_dir) { diff --git a/fs/crypto/keyring.c b/fs/crypto/keyring.c index 53cc552a7b8fd3..d7ec52cb3d9af8 100644 --- a/fs/crypto/keyring.c +++ b/fs/crypto/keyring.c @@ -44,7 +44,7 @@ static void free_master_key(struct fscrypt_master_key *mk) wipe_master_key_secret(&mk->mk_secret); - for (i = 0; i <= __FSCRYPT_MODE_MAX; i++) { + for (i = 0; i <= FSCRYPT_MODE_MAX; i++) { fscrypt_destroy_prepared_key(&mk->mk_direct_keys[i]); fscrypt_destroy_prepared_key(&mk->mk_iv_ino_lblk_64_keys[i]); fscrypt_destroy_prepared_key(&mk->mk_iv_ino_lblk_32_keys[i]); diff --git a/fs/crypto/keysetup.c b/fs/crypto/keysetup.c index d595abb8ef90da..31fb08d94f8741 100644 --- a/fs/crypto/keysetup.c +++ b/fs/crypto/keysetup.c @@ -56,6 +56,8 @@ static struct fscrypt_mode * select_encryption_mode(const union fscrypt_policy *policy, const struct inode *inode) { + BUILD_BUG_ON(ARRAY_SIZE(fscrypt_modes) != FSCRYPT_MODE_MAX + 1); + if (S_ISREG(inode->i_mode)) return &fscrypt_modes[fscrypt_policy_contents_mode(policy)]; @@ -168,7 +170,7 @@ static int setup_per_mode_enc_key(struct fscrypt_info *ci, unsigned int hkdf_infolen = 0; int err; - if (WARN_ON(mode_num > __FSCRYPT_MODE_MAX)) + if (WARN_ON(mode_num > FSCRYPT_MODE_MAX)) return -EINVAL; prep_key = &keys[mode_num]; diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c index 4441d9944b9eff..faa0f21daa6842 100644 --- a/fs/crypto/policy.c +++ b/fs/crypto/policy.c @@ -175,7 +175,10 @@ static bool fscrypt_supported_v2_policy(const struct fscrypt_policy_v2 *policy, return false; } - if (policy->flags & ~FSCRYPT_POLICY_FLAGS_VALID) { + if (policy->flags & ~(FSCRYPT_POLICY_FLAGS_PAD_MASK | + FSCRYPT_POLICY_FLAG_DIRECT_KEY | + FSCRYPT_POLICY_FLAG_IV_INO_LBLK_64 | + FSCRYPT_POLICY_FLAG_IV_INO_LBLK_32)) { fscrypt_warn(inode, "Unsupported encryption flags (0x%02x)", policy->flags); return false; diff --git a/fs/erofs/data.c b/fs/erofs/data.c index 347be146884c36..ea4f693bee2247 100644 --- a/fs/erofs/data.c +++ b/fs/erofs/data.c @@ -312,27 +312,12 @@ static void erofs_raw_access_readahead(struct readahead_control *rac) submit_bio(bio); } -static int erofs_get_block(struct inode *inode, sector_t iblock, - struct buffer_head *bh, int create) -{ - struct erofs_map_blocks map = { - .m_la = iblock << 9, - }; - int err; - - err = erofs_map_blocks(inode, &map, EROFS_GET_BLOCKS_RAW); - if (err) - return err; - - if (map.m_flags & EROFS_MAP_MAPPED) - bh->b_blocknr = erofs_blknr(map.m_pa); - - return err; -} - static sector_t erofs_bmap(struct address_space *mapping, sector_t block) { struct inode *inode = mapping->host; + struct erofs_map_blocks map = { + .m_la = blknr_to_addr(block), + }; if (EROFS_I(inode)->datalayout == EROFS_INODE_FLAT_INLINE) { erofs_blk_t blks = i_size_read(inode) >> LOG_BLOCK_SIZE; @@ -341,7 +326,10 @@ static sector_t erofs_bmap(struct address_space *mapping, sector_t block) return 0; } - return generic_block_bmap(mapping, block, erofs_get_block); + if (!erofs_map_blocks(inode, &map, EROFS_GET_BLOCKS_RAW)) + return erofs_blknr(map.m_pa); + + return 0; } /* for uncompressed (aligned) files and raw access for other files */ diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 4df61129566d40..117b1c395ae4ad 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1902,23 +1902,30 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, } write_unlock_irq(&ep->lock); - if (eavail || res) - break; + if (!eavail && !res) + timed_out = !schedule_hrtimeout_range(to, slack, + HRTIMER_MODE_ABS); - if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS)) { - timed_out = 1; - break; - } - - /* We were woken up, thus go and try to harvest some events */ + /* + * We were woken up, thus go and try to harvest some events. + * If timed out and still on the wait queue, recheck eavail + * carefully under lock, below. + */ eavail = 1; - } while (0); __set_current_state(TASK_RUNNING); if (!list_empty_careful(&wait.entry)) { write_lock_irq(&ep->lock); + /* + * If the thread timed out and is not on the wait queue, it + * means that the thread was woken up after its timeout expired + * before it could reacquire the lock. Thus, when wait.entry is + * empty, it needs to harvest events. + */ + if (timed_out) + eavail = list_empty(&wait.entry); __remove_wait_queue(&ep->wq, &wait); write_unlock_irq(&ep->lock); } diff --git a/fs/exec.c b/fs/exec.c index 547a2390baf54f..ca89e0e3ef10f1 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -965,8 +965,8 @@ EXPORT_SYMBOL(read_code); /* * Maps the mm_struct mm into the current task struct. - * On success, this function returns with the mutex - * exec_update_mutex locked. + * On success, this function returns with exec_update_lock + * held for writing. */ static int exec_mmap(struct mm_struct *mm) { @@ -981,7 +981,7 @@ static int exec_mmap(struct mm_struct *mm) if (old_mm) sync_mm_rss(old_mm); - ret = mutex_lock_killable(&tsk->signal->exec_update_mutex); + ret = down_write_killable(&tsk->signal->exec_update_lock); if (ret) return ret; @@ -995,7 +995,7 @@ static int exec_mmap(struct mm_struct *mm) mmap_read_lock(old_mm); if (unlikely(old_mm->core_state)) { mmap_read_unlock(old_mm); - mutex_unlock(&tsk->signal->exec_update_mutex); + up_write(&tsk->signal->exec_update_lock); return -EINTR; } } @@ -1382,7 +1382,7 @@ int begin_new_exec(struct linux_binprm * bprm) return 0; out_unlock: - mutex_unlock(&me->signal->exec_update_mutex); + up_write(&me->signal->exec_update_lock); out: return retval; } @@ -1423,7 +1423,7 @@ void setup_new_exec(struct linux_binprm * bprm) * some architectures like powerpc */ me->mm->task_size = TASK_SIZE; - mutex_unlock(&me->signal->exec_update_mutex); + up_write(&me->signal->exec_update_lock); mutex_unlock(&me->signal->cred_guard_mutex); } EXPORT_SYMBOL(setup_new_exec); diff --git a/fs/exfat/nls.c b/fs/exfat/nls.c index 675d0e7058c5a0..314d5407a1be50 100644 --- a/fs/exfat/nls.c +++ b/fs/exfat/nls.c @@ -659,7 +659,7 @@ static int exfat_load_upcase_table(struct super_block *sb, unsigned char skip = false; unsigned short *upcase_table; - upcase_table = kcalloc(UTBL_COUNT, sizeof(unsigned short), GFP_KERNEL); + upcase_table = kvcalloc(UTBL_COUNT, sizeof(unsigned short), GFP_KERNEL); if (!upcase_table) return -ENOMEM; @@ -715,7 +715,7 @@ static int exfat_load_default_upcase_table(struct super_block *sb) unsigned short uni = 0, *upcase_table; unsigned int index = 0; - upcase_table = kcalloc(UTBL_COUNT, sizeof(unsigned short), GFP_KERNEL); + upcase_table = kvcalloc(UTBL_COUNT, sizeof(unsigned short), GFP_KERNEL); if (!upcase_table) return -ENOMEM; @@ -803,5 +803,5 @@ int exfat_create_upcase_table(struct super_block *sb) void exfat_free_upcase_table(struct exfat_sb_info *sbi) { - kfree(sbi->vol_utbl); + kvfree(sbi->vol_utbl); } diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 17d7096b3212dc..12eac88373032d 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -5815,8 +5815,8 @@ int ext4_ext_replay_update_ex(struct inode *inode, ext4_lblk_t start, int ret; path = ext4_find_extent(inode, start, NULL, 0); - if (!path) - return -EINVAL; + if (IS_ERR(path)) + return PTR_ERR(path); ex = path[path->p_depth].p_ext; if (!ex) { ret = -EFSCORRUPTED; diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index f2033e13a273c7..a1dd7ca962c3f5 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -1207,7 +1207,7 @@ static void ext4_fc_cleanup(journal_t *journal, int full) list_splice_init(&sbi->s_fc_dentry_q[FC_Q_STAGING], &sbi->s_fc_dentry_q[FC_Q_MAIN]); list_splice_init(&sbi->s_fc_q[FC_Q_STAGING], - &sbi->s_fc_q[FC_Q_STAGING]); + &sbi->s_fc_q[FC_Q_MAIN]); ext4_clear_mount_flag(sb, EXT4_MF_FC_COMMITTING); ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE); @@ -1269,14 +1269,14 @@ static int ext4_fc_replay_unlink(struct super_block *sb, struct ext4_fc_tl *tl) entry.len = darg.dname_len; inode = ext4_iget(sb, darg.ino, EXT4_IGET_NORMAL); - if (IS_ERR_OR_NULL(inode)) { + if (IS_ERR(inode)) { jbd_debug(1, "Inode %d not found", darg.ino); return 0; } old_parent = ext4_iget(sb, darg.parent_ino, EXT4_IGET_NORMAL); - if (IS_ERR_OR_NULL(old_parent)) { + if (IS_ERR(old_parent)) { jbd_debug(1, "Dir with inode %d not found", darg.parent_ino); iput(inode); return 0; @@ -1361,7 +1361,7 @@ static int ext4_fc_replay_link(struct super_block *sb, struct ext4_fc_tl *tl) darg.parent_ino, darg.dname_len); inode = ext4_iget(sb, darg.ino, EXT4_IGET_NORMAL); - if (IS_ERR_OR_NULL(inode)) { + if (IS_ERR(inode)) { jbd_debug(1, "Inode not found."); return 0; } @@ -1417,10 +1417,11 @@ static int ext4_fc_replay_inode(struct super_block *sb, struct ext4_fc_tl *tl) trace_ext4_fc_replay(sb, tag, ino, 0, 0); inode = ext4_iget(sb, ino, EXT4_IGET_NORMAL); - if (!IS_ERR_OR_NULL(inode)) { + if (!IS_ERR(inode)) { ext4_ext_clear_bb(inode); iput(inode); } + inode = NULL; ext4_fc_record_modified_inode(sb, ino); @@ -1463,7 +1464,7 @@ static int ext4_fc_replay_inode(struct super_block *sb, struct ext4_fc_tl *tl) /* Given that we just wrote the inode on disk, this SHOULD succeed. */ inode = ext4_iget(sb, ino, EXT4_IGET_NORMAL); - if (IS_ERR_OR_NULL(inode)) { + if (IS_ERR(inode)) { jbd_debug(1, "Inode not found."); return -EFSCORRUPTED; } @@ -1515,7 +1516,7 @@ static int ext4_fc_replay_create(struct super_block *sb, struct ext4_fc_tl *tl) goto out; inode = ext4_iget(sb, darg.ino, EXT4_IGET_NORMAL); - if (IS_ERR_OR_NULL(inode)) { + if (IS_ERR(inode)) { jbd_debug(1, "inode %d not found.", darg.ino); inode = NULL; ret = -EINVAL; @@ -1528,7 +1529,7 @@ static int ext4_fc_replay_create(struct super_block *sb, struct ext4_fc_tl *tl) * dot and dot dot dirents are setup properly. */ dir = ext4_iget(sb, darg.parent_ino, EXT4_IGET_NORMAL); - if (IS_ERR_OR_NULL(dir)) { + if (IS_ERR(dir)) { jbd_debug(1, "Dir %d not found.", darg.ino); goto out; } @@ -1604,7 +1605,7 @@ static int ext4_fc_replay_add_range(struct super_block *sb, inode = ext4_iget(sb, le32_to_cpu(fc_add_ex->fc_ino), EXT4_IGET_NORMAL); - if (IS_ERR_OR_NULL(inode)) { + if (IS_ERR(inode)) { jbd_debug(1, "Inode not found."); return 0; } @@ -1728,7 +1729,7 @@ ext4_fc_replay_del_range(struct super_block *sb, struct ext4_fc_tl *tl) le32_to_cpu(lrange->fc_ino), cur, remaining); inode = ext4_iget(sb, le32_to_cpu(lrange->fc_ino), EXT4_IGET_NORMAL); - if (IS_ERR_OR_NULL(inode)) { + if (IS_ERR(inode)) { jbd_debug(1, "Inode %d not found", le32_to_cpu(lrange->fc_ino)); return 0; } @@ -1809,7 +1810,7 @@ static void ext4_fc_set_bitmaps_and_counters(struct super_block *sb) for (i = 0; i < state->fc_modified_inodes_used; i++) { inode = ext4_iget(sb, state->fc_modified_inodes[i], EXT4_IGET_NORMAL); - if (IS_ERR_OR_NULL(inode)) { + if (IS_ERR(inode)) { jbd_debug(1, "Inode %d not found.", state->fc_modified_inodes[i]); continue; @@ -1826,7 +1827,7 @@ static void ext4_fc_set_bitmaps_and_counters(struct super_block *sb) if (ret > 0) { path = ext4_find_extent(inode, map.m_lblk, NULL, 0); - if (!IS_ERR_OR_NULL(path)) { + if (!IS_ERR(path)) { for (j = 0; j < path->p_depth; j++) ext4_mb_mark_bb(inode->i_sb, path[j].p_block, 1, 1); diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 3ed8c048fb12c5..b692355b8c770c 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -809,7 +809,7 @@ static int ext4_sample_last_mounted(struct super_block *sb, err = ext4_journal_get_write_access(handle, sbi->s_sbh); if (err) goto out_journal; - strlcpy(sbi->s_es->s_last_mounted, cp, + strncpy(sbi->s_es->s_last_mounted, cp, sizeof(sbi->s_es->s_last_mounted)); ext4_handle_dirty_super(handle, sb); out_journal: diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 0d8385aea8981c..0afab6d5c65bd0 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -175,6 +175,7 @@ void ext4_evict_inode(struct inode *inode) */ int extra_credits = 6; struct ext4_xattr_inode_array *ea_inode_array = NULL; + bool freeze_protected = false; trace_ext4_evict_inode(inode); @@ -232,9 +233,14 @@ void ext4_evict_inode(struct inode *inode) /* * Protect us against freezing - iput() caller didn't have to have any - * protection against it + * protection against it. When we are in a running transaction though, + * we are already protected against freezing and we cannot grab further + * protection due to lock ordering constraints. */ - sb_start_intwrite(inode->i_sb); + if (!ext4_journal_current_handle()) { + sb_start_intwrite(inode->i_sb); + freeze_protected = true; + } if (!IS_NOQUOTA(inode)) extra_credits += EXT4_MAXQUOTAS_DEL_BLOCKS(inode->i_sb); @@ -253,7 +259,8 @@ void ext4_evict_inode(struct inode *inode) * cleaned up. */ ext4_orphan_del(NULL, inode); - sb_end_intwrite(inode->i_sb); + if (freeze_protected) + sb_end_intwrite(inode->i_sb); goto no_delete; } @@ -294,7 +301,8 @@ void ext4_evict_inode(struct inode *inode) stop_handle: ext4_journal_stop(handle); ext4_orphan_del(NULL, inode); - sb_end_intwrite(inode->i_sb); + if (freeze_protected) + sb_end_intwrite(inode->i_sb); ext4_xattr_inode_array_free(ea_inode_array); goto no_delete; } @@ -323,7 +331,8 @@ void ext4_evict_inode(struct inode *inode) else ext4_free_inode(handle, inode); ext4_journal_stop(handle); - sb_end_intwrite(inode->i_sb); + if (freeze_protected) + sb_end_intwrite(inode->i_sb); ext4_xattr_inode_array_free(ea_inode_array); return; no_delete: diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index f0381876a7e5b0..106bf149e8ca8c 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -1157,7 +1157,10 @@ static long __ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) err = ext4_journal_get_write_access(handle, sbi->s_sbh); if (err) goto pwsalt_err_journal; + lock_buffer(sbi->s_sbh); generate_random_uuid(sbi->s_es->s_encrypt_pw_salt); + ext4_superblock_csum_set(sb); + unlock_buffer(sbi->s_sbh); err = ext4_handle_dirty_metadata(handle, NULL, sbi->s_sbh); pwsalt_err_journal: diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 24af9ed5c3e525..e67d5de6f28ca6 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2395,9 +2395,9 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac) nr = sbi->s_mb_prefetch; if (ext4_has_feature_flex_bg(sb)) { - nr = (group / sbi->s_mb_prefetch) * - sbi->s_mb_prefetch; - nr = nr + sbi->s_mb_prefetch - group; + nr = 1 << sbi->s_log_groups_per_flex; + nr -= group & (nr - 1); + nr = min(nr, sbi->s_mb_prefetch); } prefetch_grp = ext4_mb_prefetch(sb, group, nr, &prefetch_ios); @@ -2733,7 +2733,8 @@ static int ext4_mb_init_backend(struct super_block *sb) if (ext4_has_feature_flex_bg(sb)) { /* a single flex group is supposed to be read by a single IO */ - sbi->s_mb_prefetch = 1 << sbi->s_es->s_log_groups_per_flex; + sbi->s_mb_prefetch = min(1 << sbi->s_es->s_log_groups_per_flex, + BLK_MAX_SEGMENT_SIZE >> (sb->s_blocksize_bits - 9)); sbi->s_mb_prefetch *= 8; /* 8 prefetch IOs in flight at most */ } else { sbi->s_mb_prefetch = 32; @@ -5126,6 +5127,7 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, ext4_group_first_block_no(sb, group) + EXT4_C2B(sbi, cluster), "Block already on to-be-freed list"); + kmem_cache_free(ext4_free_data_cachep, new_entry); return 0; } } diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 33509266f5a00a..df0886e08a7725 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -2195,6 +2195,9 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, if (!dentry->d_name.len) return -EINVAL; + if (fscrypt_is_nokey_name(dentry)) + return -ENOKEY; + #ifdef CONFIG_UNICODE if (sb_has_strict_encoding(sb) && IS_CASEFOLDED(dir) && sb->s_encoding && utf8_validate(sb->s_encoding, &dentry->d_name)) @@ -3599,9 +3602,6 @@ static int ext4_setent(handle_t *handle, struct ext4_renament *ent, return retval2; } } - brelse(ent->bh); - ent->bh = NULL; - return retval; } @@ -3800,6 +3800,7 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, } } + old_file_type = old.de->file_type; if (IS_DIRSYNC(old.dir) || IS_DIRSYNC(new.dir)) ext4_handle_sync(handle); @@ -3827,7 +3828,6 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, force_reread = (new.dir->i_ino == old.dir->i_ino && ext4_test_inode_flag(new.dir, EXT4_INODE_INLINE_DATA)); - old_file_type = old.de->file_type; if (whiteout) { /* * Do this before adding a new entry, so the old entry is sure @@ -3925,15 +3925,19 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, retval = 0; end_rename: - brelse(old.dir_bh); - brelse(old.bh); - brelse(new.bh); if (whiteout) { - if (retval) + if (retval) { + ext4_setent(handle, &old, + old.inode->i_ino, old_file_type); drop_nlink(whiteout); + } unlock_new_inode(whiteout); iput(whiteout); + } + brelse(old.dir_bh); + brelse(old.bh); + brelse(new.bh); if (handle) ext4_journal_stop(handle); return retval; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 94472044f4c1d5..ea5aefa23a20a6 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -666,19 +666,17 @@ static bool system_going_down(void) static void ext4_handle_error(struct super_block *sb) { + journal_t *journal = EXT4_SB(sb)->s_journal; + if (test_opt(sb, WARN_ON_ERROR)) WARN_ON_ONCE(1); - if (sb_rdonly(sb)) + if (sb_rdonly(sb) || test_opt(sb, ERRORS_CONT)) return; - if (!test_opt(sb, ERRORS_CONT)) { - journal_t *journal = EXT4_SB(sb)->s_journal; - - ext4_set_mount_flag(sb, EXT4_MF_FS_ABORTED); - if (journal) - jbd2_journal_abort(journal, -EIO); - } + ext4_set_mount_flag(sb, EXT4_MF_FS_ABORTED); + if (journal) + jbd2_journal_abort(journal, -EIO); /* * We force ERRORS_RO behavior when system is rebooting. Otherwise we * could panic during 'reboot -f' as the underlying device got already @@ -4188,18 +4186,25 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) */ sbi->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT; - blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size); - - if (blocksize == PAGE_SIZE) - set_opt(sb, DIOREAD_NOLOCK); - - if (blocksize < EXT4_MIN_BLOCK_SIZE || - blocksize > EXT4_MAX_BLOCK_SIZE) { + if (le32_to_cpu(es->s_log_block_size) > + (EXT4_MAX_BLOCK_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) { ext4_msg(sb, KERN_ERR, - "Unsupported filesystem blocksize %d (%d log_block_size)", - blocksize, le32_to_cpu(es->s_log_block_size)); + "Invalid log block size: %u", + le32_to_cpu(es->s_log_block_size)); goto failed_mount; } + if (le32_to_cpu(es->s_log_cluster_size) > + (EXT4_MAX_CLUSTER_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) { + ext4_msg(sb, KERN_ERR, + "Invalid log cluster size: %u", + le32_to_cpu(es->s_log_cluster_size)); + goto failed_mount; + } + + blocksize = EXT4_MIN_BLOCK_SIZE << le32_to_cpu(es->s_log_block_size); + + if (blocksize == PAGE_SIZE) + set_opt(sb, DIOREAD_NOLOCK); if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) { sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE; @@ -4418,21 +4423,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) if (!ext4_feature_set_ok(sb, (sb_rdonly(sb)))) goto failed_mount; - if (le32_to_cpu(es->s_log_block_size) > - (EXT4_MAX_BLOCK_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) { - ext4_msg(sb, KERN_ERR, - "Invalid log block size: %u", - le32_to_cpu(es->s_log_block_size)); - goto failed_mount; - } - if (le32_to_cpu(es->s_log_cluster_size) > - (EXT4_MAX_CLUSTER_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) { - ext4_msg(sb, KERN_ERR, - "Invalid log cluster size: %u", - le32_to_cpu(es->s_log_cluster_size)); - goto failed_mount; - } - if (le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) > (blocksize / 4)) { ext4_msg(sb, KERN_ERR, "Number of reserved GDT blocks insanely large: %d", diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 023462e80e58d5..b39bf416d51148 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1600,7 +1600,7 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) goto out; } - if (NM_I(sbi)->dirty_nat_cnt == 0 && + if (NM_I(sbi)->nat_cnt[DIRTY_NAT] == 0 && SIT_I(sbi)->dirty_sentries == 0 && prefree_segments(sbi) == 0) { f2fs_flush_sit_entries(sbi, cpc); diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 14262e0f1cd60e..c5fee4d7ea72fe 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -798,8 +798,6 @@ void f2fs_decompress_pages(struct bio *bio, struct page *page, bool verity) if (cops->destroy_decompress_ctx) cops->destroy_decompress_ctx(dic); out_free_dic: - if (verity) - atomic_set(&dic->pending_pages, dic->nr_cpages); if (!verity) f2fs_decompress_end_io(dic->rpages, dic->cluster_size, ret, false); diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index be4da52604edc1..b29243ee1c3e54 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -202,7 +202,7 @@ static void f2fs_verify_bio(struct bio *bio) dic = (struct decompress_io_ctx *)page_private(page); if (dic) { - if (atomic_dec_return(&dic->pending_pages)) + if (atomic_dec_return(&dic->verity_pages)) continue; f2fs_verify_pages(dic->rpages, dic->cluster_size); @@ -1027,7 +1027,8 @@ static inline bool f2fs_need_verity(const struct inode *inode, pgoff_t idx) static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr, unsigned nr_pages, unsigned op_flag, - pgoff_t first_idx, bool for_write) + pgoff_t first_idx, bool for_write, + bool for_verity) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct bio *bio; @@ -1049,7 +1050,7 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr, post_read_steps |= 1 << STEP_DECRYPT; if (f2fs_compressed_file(inode)) post_read_steps |= 1 << STEP_DECOMPRESS_NOWQ; - if (f2fs_need_verity(inode, first_idx)) + if (for_verity && f2fs_need_verity(inode, first_idx)) post_read_steps |= 1 << STEP_VERITY; if (post_read_steps) { @@ -1079,7 +1080,7 @@ static int f2fs_submit_page_read(struct inode *inode, struct page *page, struct bio *bio; bio = f2fs_grab_read_bio(inode, blkaddr, 1, op_flags, - page->index, for_write); + page->index, for_write, true); if (IS_ERR(bio)) return PTR_ERR(bio); @@ -2133,7 +2134,7 @@ static int f2fs_read_single_page(struct inode *inode, struct page *page, if (bio == NULL) { bio = f2fs_grab_read_bio(inode, block_nr, nr_pages, is_readahead ? REQ_RAHEAD : 0, page->index, - false); + false, true); if (IS_ERR(bio)) { ret = PTR_ERR(bio); bio = NULL; @@ -2180,6 +2181,8 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, const unsigned blkbits = inode->i_blkbits; const unsigned blocksize = 1 << blkbits; struct decompress_io_ctx *dic = NULL; + struct bio_post_read_ctx *ctx; + bool for_verity = false; int i; int ret = 0; @@ -2245,10 +2248,29 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, goto out_put_dnode; } + /* + * It's possible to enable fsverity on the fly when handling a cluster, + * which requires complicated error handling. Instead of adding more + * complexity, let's give a rule where end_io post-processes fsverity + * per cluster. In order to do that, we need to submit bio, if previous + * bio sets a different post-process policy. + */ + if (fsverity_active(cc->inode)) { + atomic_set(&dic->verity_pages, cc->nr_cpages); + for_verity = true; + + if (bio) { + ctx = bio->bi_private; + if (!(ctx->enabled_steps & (1 << STEP_VERITY))) { + __submit_bio(sbi, bio, DATA); + bio = NULL; + } + } + } + for (i = 0; i < dic->nr_cpages; i++) { struct page *page = dic->cpages[i]; block_t blkaddr; - struct bio_post_read_ctx *ctx; blkaddr = data_blkaddr(dn.inode, dn.node_page, dn.ofs_in_node + i + 1); @@ -2264,17 +2286,31 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, if (!bio) { bio = f2fs_grab_read_bio(inode, blkaddr, nr_pages, is_readahead ? REQ_RAHEAD : 0, - page->index, for_write); + page->index, for_write, for_verity); if (IS_ERR(bio)) { + unsigned int remained = dic->nr_cpages - i; + bool release = false; + ret = PTR_ERR(bio); dic->failed = true; - if (!atomic_sub_return(dic->nr_cpages - i, - &dic->pending_pages)) { + + if (for_verity) { + if (!atomic_sub_return(remained, + &dic->verity_pages)) + release = true; + } else { + if (!atomic_sub_return(remained, + &dic->pending_pages)) + release = true; + } + + if (release) { f2fs_decompress_end_io(dic->rpages, - cc->cluster_size, true, - false); + cc->cluster_size, true, + false); f2fs_free_dic(dic); } + f2fs_put_dnode(&dn); *bio_ret = NULL; return ret; diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index a8357fd4f5fab9..197c914119da8e 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -145,8 +145,8 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->node_pages = NODE_MAPPING(sbi)->nrpages; if (sbi->meta_inode) si->meta_pages = META_MAPPING(sbi)->nrpages; - si->nats = NM_I(sbi)->nat_cnt; - si->dirty_nats = NM_I(sbi)->dirty_nat_cnt; + si->nats = NM_I(sbi)->nat_cnt[TOTAL_NAT]; + si->dirty_nats = NM_I(sbi)->nat_cnt[DIRTY_NAT]; si->sits = MAIN_SEGS(sbi); si->dirty_sits = SIT_I(sbi)->dirty_sentries; si->free_nids = NM_I(sbi)->nid_cnt[FREE_NID]; @@ -278,9 +278,10 @@ static void update_mem_info(struct f2fs_sb_info *sbi) si->cache_mem += (NM_I(sbi)->nid_cnt[FREE_NID] + NM_I(sbi)->nid_cnt[PREALLOC_NID]) * sizeof(struct free_nid); - si->cache_mem += NM_I(sbi)->nat_cnt * sizeof(struct nat_entry); - si->cache_mem += NM_I(sbi)->dirty_nat_cnt * - sizeof(struct nat_entry_set); + si->cache_mem += NM_I(sbi)->nat_cnt[TOTAL_NAT] * + sizeof(struct nat_entry); + si->cache_mem += NM_I(sbi)->nat_cnt[DIRTY_NAT] * + sizeof(struct nat_entry_set); si->cache_mem += si->inmem_pages * sizeof(struct inmem_pages); for (i = 0; i < MAX_INO_ENTRY; i++) si->cache_mem += sbi->im[i].ino_num * sizeof(struct ino_entry); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index cb700d79729680..06e5a6053f3f9b 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -894,6 +894,13 @@ enum nid_state { MAX_NID_STATE, }; +enum nat_state { + TOTAL_NAT, + DIRTY_NAT, + RECLAIMABLE_NAT, + MAX_NAT_STATE, +}; + struct f2fs_nm_info { block_t nat_blkaddr; /* base disk address of NAT */ nid_t max_nid; /* maximum possible node ids */ @@ -909,8 +916,7 @@ struct f2fs_nm_info { struct rw_semaphore nat_tree_lock; /* protect nat_tree_lock */ struct list_head nat_entries; /* cached nat entry list (clean) */ spinlock_t nat_list_lock; /* protect clean nat entry list */ - unsigned int nat_cnt; /* the # of cached nat entries */ - unsigned int dirty_nat_cnt; /* total num of nat entries in set */ + unsigned int nat_cnt[MAX_NAT_STATE]; /* the # of cached nat entries */ unsigned int nat_blocks; /* # of nat blocks */ /* free node ids management */ @@ -1404,6 +1410,7 @@ struct decompress_io_ctx { size_t rlen; /* valid data length in rbuf */ size_t clen; /* valid data length in cbuf */ atomic_t pending_pages; /* in-flight compressed page count */ + atomic_t verity_pages; /* in-flight page count for verity */ bool failed; /* indicate IO error during decompression */ void *private; /* payload buffer for specified decompression algorithm */ void *private2; /* extra payload buffer */ @@ -3251,6 +3258,8 @@ bool f2fs_empty_dir(struct inode *dir); static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode) { + if (fscrypt_is_nokey_name(dentry)) + return -ENOKEY; return f2fs_do_add_link(d_inode(dentry->d_parent), &dentry->d_name, inode, inode->i_ino, inode->i_mode); } diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index ee861c6d9ff026..fe39e591e5b4c4 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -412,9 +412,14 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence) goto fail; /* handle inline data case */ - if (f2fs_has_inline_data(inode) && whence == SEEK_HOLE) { - data_ofs = isize; - goto found; + if (f2fs_has_inline_data(inode)) { + if (whence == SEEK_HOLE) { + data_ofs = isize; + goto found; + } else if (whence == SEEK_DATA) { + data_ofs = offset; + goto found; + } } pgofs = (pgoff_t)(offset >> PAGE_SHIFT); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index d5d8ce077f2959..e65d73293a3f63 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -62,8 +62,8 @@ bool f2fs_available_free_memory(struct f2fs_sb_info *sbi, int type) sizeof(struct free_nid)) >> PAGE_SHIFT; res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2); } else if (type == NAT_ENTRIES) { - mem_size = (nm_i->nat_cnt * sizeof(struct nat_entry)) >> - PAGE_SHIFT; + mem_size = (nm_i->nat_cnt[TOTAL_NAT] * + sizeof(struct nat_entry)) >> PAGE_SHIFT; res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2); if (excess_cached_nats(sbi)) res = false; @@ -109,7 +109,7 @@ static void clear_node_page_dirty(struct page *page) static struct page *get_current_nat_page(struct f2fs_sb_info *sbi, nid_t nid) { - return f2fs_get_meta_page(sbi, current_nat_addr(sbi, nid)); + return f2fs_get_meta_page_retry(sbi, current_nat_addr(sbi, nid)); } static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid) @@ -177,7 +177,8 @@ static struct nat_entry *__init_nat_entry(struct f2fs_nm_info *nm_i, list_add_tail(&ne->list, &nm_i->nat_entries); spin_unlock(&nm_i->nat_list_lock); - nm_i->nat_cnt++; + nm_i->nat_cnt[TOTAL_NAT]++; + nm_i->nat_cnt[RECLAIMABLE_NAT]++; return ne; } @@ -207,7 +208,8 @@ static unsigned int __gang_lookup_nat_cache(struct f2fs_nm_info *nm_i, static void __del_from_nat_cache(struct f2fs_nm_info *nm_i, struct nat_entry *e) { radix_tree_delete(&nm_i->nat_root, nat_get_nid(e)); - nm_i->nat_cnt--; + nm_i->nat_cnt[TOTAL_NAT]--; + nm_i->nat_cnt[RECLAIMABLE_NAT]--; __free_nat_entry(e); } @@ -253,7 +255,8 @@ static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i, if (get_nat_flag(ne, IS_DIRTY)) goto refresh_list; - nm_i->dirty_nat_cnt++; + nm_i->nat_cnt[DIRTY_NAT]++; + nm_i->nat_cnt[RECLAIMABLE_NAT]--; set_nat_flag(ne, IS_DIRTY, true); refresh_list: spin_lock(&nm_i->nat_list_lock); @@ -273,7 +276,8 @@ static void __clear_nat_cache_dirty(struct f2fs_nm_info *nm_i, set_nat_flag(ne, IS_DIRTY, false); set->entry_cnt--; - nm_i->dirty_nat_cnt--; + nm_i->nat_cnt[DIRTY_NAT]--; + nm_i->nat_cnt[RECLAIMABLE_NAT]++; } static unsigned int __gang_lookup_nat_set(struct f2fs_nm_info *nm_i, @@ -2944,14 +2948,17 @@ int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) LIST_HEAD(sets); int err = 0; - /* during unmount, let's flush nat_bits before checking dirty_nat_cnt */ + /* + * during unmount, let's flush nat_bits before checking + * nat_cnt[DIRTY_NAT]. + */ if (enabled_nat_bits(sbi, cpc)) { down_write(&nm_i->nat_tree_lock); remove_nats_in_journal(sbi); up_write(&nm_i->nat_tree_lock); } - if (!nm_i->dirty_nat_cnt) + if (!nm_i->nat_cnt[DIRTY_NAT]) return 0; down_write(&nm_i->nat_tree_lock); @@ -2962,7 +2969,8 @@ int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) * into nat entry set. */ if (enabled_nat_bits(sbi, cpc) || - !__has_cursum_space(journal, nm_i->dirty_nat_cnt, NAT_JOURNAL)) + !__has_cursum_space(journal, + nm_i->nat_cnt[DIRTY_NAT], NAT_JOURNAL)) remove_nats_in_journal(sbi); while ((found = __gang_lookup_nat_set(nm_i, @@ -3086,7 +3094,6 @@ static int init_node_manager(struct f2fs_sb_info *sbi) F2FS_RESERVED_NODE_NUM; nm_i->nid_cnt[FREE_NID] = 0; nm_i->nid_cnt[PREALLOC_NID] = 0; - nm_i->nat_cnt = 0; nm_i->ram_thresh = DEF_RAM_THRESHOLD; nm_i->ra_nid_pages = DEF_RA_NID_PAGES; nm_i->dirty_nats_ratio = DEF_DIRTY_NAT_RATIO_THRESHOLD; @@ -3220,7 +3227,7 @@ void f2fs_destroy_node_manager(struct f2fs_sb_info *sbi) __del_from_nat_cache(nm_i, natvec[idx]); } } - f2fs_bug_on(sbi, nm_i->nat_cnt); + f2fs_bug_on(sbi, nm_i->nat_cnt[TOTAL_NAT]); /* destroy nat set cache */ nid = 0; diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 69e5859e993cf7..f84541b57acbbc 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -126,13 +126,13 @@ static inline void raw_nat_from_node_info(struct f2fs_nat_entry *raw_ne, static inline bool excess_dirty_nats(struct f2fs_sb_info *sbi) { - return NM_I(sbi)->dirty_nat_cnt >= NM_I(sbi)->max_nid * + return NM_I(sbi)->nat_cnt[DIRTY_NAT] >= NM_I(sbi)->max_nid * NM_I(sbi)->dirty_nats_ratio / 100; } static inline bool excess_cached_nats(struct f2fs_sb_info *sbi) { - return NM_I(sbi)->nat_cnt >= DEF_NAT_CACHE_THRESHOLD; + return NM_I(sbi)->nat_cnt[TOTAL_NAT] >= DEF_NAT_CACHE_THRESHOLD; } static inline bool excess_dirty_nodes(struct f2fs_sb_info *sbi) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 1596502f7375ca..f2a4265318f5cd 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -4544,7 +4544,7 @@ static void init_dirty_segmap(struct f2fs_sb_info *sbi) return; mutex_lock(&dirty_i->seglist_lock); - for (segno = 0; segno < MAIN_SECS(sbi); segno += blks_per_sec) { + for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) { valid_blocks = get_valid_blocks(sbi, segno, true); secno = GET_SEC_FROM_SEG(sbi, segno); diff --git a/fs/f2fs/shrinker.c b/fs/f2fs/shrinker.c index d66de5999a26d8..dd3c3c7a90ec8f 100644 --- a/fs/f2fs/shrinker.c +++ b/fs/f2fs/shrinker.c @@ -18,9 +18,7 @@ static unsigned int shrinker_run_no; static unsigned long __count_nat_entries(struct f2fs_sb_info *sbi) { - long count = NM_I(sbi)->nat_cnt - NM_I(sbi)->dirty_nat_cnt; - - return count > 0 ? count : 0; + return NM_I(sbi)->nat_cnt[RECLAIMABLE_NAT]; } static unsigned long __count_free_nids(struct f2fs_sb_info *sbi) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 00eff2f5180790..aa284ce7ec00df 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -2744,7 +2744,6 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi, block_t total_sections, blocks_per_seg; struct f2fs_super_block *raw_super = (struct f2fs_super_block *) (bh->b_data + F2FS_SUPER_OFFSET); - unsigned int blocksize; size_t crc_offset = 0; __u32 crc = 0; @@ -2778,10 +2777,10 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi, } /* Currently, support only 4KB block size */ - blocksize = 1 << le32_to_cpu(raw_super->log_blocksize); - if (blocksize != F2FS_BLKSIZE) { - f2fs_info(sbi, "Invalid blocksize (%u), supports only 4KB", - blocksize); + if (le32_to_cpu(raw_super->log_blocksize) != F2FS_BLKSIZE_BITS) { + f2fs_info(sbi, "Invalid log_blocksize (%u), supports only %u", + le32_to_cpu(raw_super->log_blocksize), + F2FS_BLKSIZE_BITS); return -EFSCORRUPTED; } @@ -3918,6 +3917,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) #ifdef CONFIG_UNICODE utf8_unload(sb->s_encoding); + sb->s_encoding = NULL; #endif free_options: #ifdef CONFIG_QUOTA diff --git a/fs/fcntl.c b/fs/fcntl.c index 19ac5baad50fdc..05b36b28f2e87f 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -781,9 +781,10 @@ void send_sigio(struct fown_struct *fown, int fd, int band) { struct task_struct *p; enum pid_type type; + unsigned long flags; struct pid *pid; - read_lock(&fown->lock); + read_lock_irqsave(&fown->lock, flags); type = fown->pid_type; pid = fown->pid; @@ -804,7 +805,7 @@ void send_sigio(struct fown_struct *fown, int fd, int band) read_unlock(&tasklist_lock); } out_unlock_fown: - read_unlock(&fown->lock); + read_unlock_irqrestore(&fown->lock, flags); } static void send_sigurg_to_task(struct task_struct *p, @@ -819,9 +820,10 @@ int send_sigurg(struct fown_struct *fown) struct task_struct *p; enum pid_type type; struct pid *pid; + unsigned long flags; int ret = 0; - read_lock(&fown->lock); + read_lock_irqsave(&fown->lock, flags); type = fown->pid_type; pid = fown->pid; @@ -844,7 +846,7 @@ int send_sigurg(struct fown_struct *fown) read_unlock(&tasklist_lock); } out_unlock_fown: - read_unlock(&fown->lock); + read_unlock_irqrestore(&fown->lock, flags); return ret; } diff --git a/fs/file.c b/fs/file.c index 4559b5fec3bd53..21c0893f2f1df8 100644 --- a/fs/file.c +++ b/fs/file.c @@ -21,7 +21,6 @@ #include #include #include -#include unsigned int sysctl_nr_open __read_mostly = 1024*1024; unsigned int sysctl_nr_open_min = BITS_PER_LONG; @@ -453,7 +452,6 @@ void exit_files(struct task_struct *tsk) struct files_struct * files = tsk->files; if (files) { - io_uring_files_cancel(files); task_lock(tsk); tsk->files = NULL; task_unlock(tsk); diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index e6005c78bfa93e..90dddb507e4afa 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -1474,21 +1474,25 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc) } /* - * Some filesystems may redirty the inode during the writeback - * due to delalloc, clear dirty metadata flags right before - * write_inode() + * If the inode has dirty timestamps and we need to write them, call + * mark_inode_dirty_sync() to notify the filesystem about it and to + * change I_DIRTY_TIME into I_DIRTY_SYNC. */ - spin_lock(&inode->i_lock); - - dirty = inode->i_state & I_DIRTY; if ((inode->i_state & I_DIRTY_TIME) && - ((dirty & I_DIRTY_INODE) || - wbc->sync_mode == WB_SYNC_ALL || wbc->for_sync || + (wbc->sync_mode == WB_SYNC_ALL || wbc->for_sync || time_after(jiffies, inode->dirtied_time_when + dirtytime_expire_interval * HZ))) { - dirty |= I_DIRTY_TIME; trace_writeback_lazytime(inode); + mark_inode_dirty_sync(inode); } + + /* + * Some filesystems may redirty the inode during the writeback + * due to delalloc, clear dirty metadata flags right before + * write_inode() + */ + spin_lock(&inode->i_lock); + dirty = inode->i_state & I_DIRTY; inode->i_state &= ~dirty; /* @@ -1509,8 +1513,6 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc) spin_unlock(&inode->i_lock); - if (dirty & I_DIRTY_TIME) - mark_inode_dirty_sync(inode); /* Don't write the inode if only I_DIRTY_PAGES was set */ if (dirty & ~I_DIRTY_PAGES) { int err = write_inode(inode, wbc); diff --git a/fs/fuse/acl.c b/fs/fuse/acl.c index 5a48cee6d7d333..f529075a2ce878 100644 --- a/fs/fuse/acl.c +++ b/fs/fuse/acl.c @@ -19,6 +19,9 @@ struct posix_acl *fuse_get_acl(struct inode *inode, int type) void *value = NULL; struct posix_acl *acl; + if (fuse_is_bad(inode)) + return ERR_PTR(-EIO); + if (!fc->posix_acl || fc->no_getxattr) return NULL; @@ -53,6 +56,9 @@ int fuse_set_acl(struct inode *inode, struct posix_acl *acl, int type) const char *name; int ret; + if (fuse_is_bad(inode)) + return -EIO; + if (!fc->posix_acl || fc->no_setxattr) return -EOPNOTSUPP; diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index ff7dbeb16f88da..ffa031fe52933f 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -202,7 +202,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags) int ret; inode = d_inode_rcu(entry); - if (inode && is_bad_inode(inode)) + if (inode && fuse_is_bad(inode)) goto invalid; else if (time_before64(fuse_dentry_time(entry), get_jiffies_64()) || (flags & LOOKUP_REVAL)) { @@ -463,6 +463,9 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry, bool outarg_valid = true; bool locked; + if (fuse_is_bad(dir)) + return ERR_PTR(-EIO); + locked = fuse_lock_inode(dir); err = fuse_lookup_name(dir->i_sb, get_node_id(dir), &entry->d_name, &outarg, &inode); @@ -606,6 +609,9 @@ static int fuse_atomic_open(struct inode *dir, struct dentry *entry, struct fuse_conn *fc = get_fuse_conn(dir); struct dentry *res = NULL; + if (fuse_is_bad(dir)) + return -EIO; + if (d_in_lookup(entry)) { res = fuse_lookup(dir, entry, 0); if (IS_ERR(res)) @@ -654,6 +660,9 @@ static int create_new_entry(struct fuse_mount *fm, struct fuse_args *args, int err; struct fuse_forget_link *forget; + if (fuse_is_bad(dir)) + return -EIO; + forget = fuse_alloc_forget(); if (!forget) return -ENOMEM; @@ -781,6 +790,9 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry) struct fuse_mount *fm = get_fuse_mount(dir); FUSE_ARGS(args); + if (fuse_is_bad(dir)) + return -EIO; + args.opcode = FUSE_UNLINK; args.nodeid = get_node_id(dir); args.in_numargs = 1; @@ -817,6 +829,9 @@ static int fuse_rmdir(struct inode *dir, struct dentry *entry) struct fuse_mount *fm = get_fuse_mount(dir); FUSE_ARGS(args); + if (fuse_is_bad(dir)) + return -EIO; + args.opcode = FUSE_RMDIR; args.nodeid = get_node_id(dir); args.in_numargs = 1; @@ -895,6 +910,9 @@ static int fuse_rename2(struct inode *olddir, struct dentry *oldent, struct fuse_conn *fc = get_fuse_conn(olddir); int err; + if (fuse_is_bad(olddir)) + return -EIO; + if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT)) return -EINVAL; @@ -1030,7 +1048,7 @@ static int fuse_do_getattr(struct inode *inode, struct kstat *stat, if (!err) { if (fuse_invalid_attr(&outarg.attr) || (inode->i_mode ^ outarg.attr.mode) & S_IFMT) { - make_bad_inode(inode); + fuse_make_bad(inode); err = -EIO; } else { fuse_change_attributes(inode, &outarg.attr, @@ -1232,6 +1250,9 @@ static int fuse_permission(struct inode *inode, int mask) bool refreshed = false; int err = 0; + if (fuse_is_bad(inode)) + return -EIO; + if (!fuse_allow_current_process(fc)) return -EACCES; @@ -1327,7 +1348,7 @@ static const char *fuse_get_link(struct dentry *dentry, struct inode *inode, int err; err = -EIO; - if (is_bad_inode(inode)) + if (fuse_is_bad(inode)) goto out_err; if (fc->cache_symlinks) @@ -1375,7 +1396,7 @@ static int fuse_dir_fsync(struct file *file, loff_t start, loff_t end, struct fuse_conn *fc = get_fuse_conn(inode); int err; - if (is_bad_inode(inode)) + if (fuse_is_bad(inode)) return -EIO; if (fc->no_fsyncdir) @@ -1664,7 +1685,7 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr, if (fuse_invalid_attr(&outarg.attr) || (inode->i_mode ^ outarg.attr.mode) & S_IFMT) { - make_bad_inode(inode); + fuse_make_bad(inode); err = -EIO; goto error; } @@ -1727,6 +1748,9 @@ static int fuse_setattr(struct dentry *entry, struct iattr *attr) struct file *file = (attr->ia_valid & ATTR_FILE) ? attr->ia_file : NULL; int ret; + if (fuse_is_bad(inode)) + return -EIO; + if (!fuse_allow_current_process(get_fuse_conn(inode))) return -EACCES; @@ -1785,6 +1809,9 @@ static int fuse_getattr(const struct path *path, struct kstat *stat, struct inode *inode = d_inode(path->dentry); struct fuse_conn *fc = get_fuse_conn(inode); + if (fuse_is_bad(inode)) + return -EIO; + if (!fuse_allow_current_process(fc)) { if (!request_mask) { /* diff --git a/fs/fuse/file.c b/fs/fuse/file.c index c03034e8c1529c..8b306005453ccf 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -226,6 +226,9 @@ int fuse_open_common(struct inode *inode, struct file *file, bool isdir) bool dax_truncate = (file->f_flags & O_TRUNC) && fc->atomic_o_trunc && FUSE_IS_DAX(inode); + if (fuse_is_bad(inode)) + return -EIO; + err = generic_file_open(inode, file); if (err) return err; @@ -463,7 +466,7 @@ static int fuse_flush(struct file *file, fl_owner_t id) FUSE_ARGS(args); int err; - if (is_bad_inode(inode)) + if (fuse_is_bad(inode)) return -EIO; err = write_inode_now(inode, 1); @@ -535,7 +538,7 @@ static int fuse_fsync(struct file *file, loff_t start, loff_t end, struct fuse_conn *fc = get_fuse_conn(inode); int err; - if (is_bad_inode(inode)) + if (fuse_is_bad(inode)) return -EIO; inode_lock(inode); @@ -859,7 +862,7 @@ static int fuse_readpage(struct file *file, struct page *page) int err; err = -EIO; - if (is_bad_inode(inode)) + if (fuse_is_bad(inode)) goto out; err = fuse_do_readpage(file, page); @@ -952,7 +955,7 @@ static void fuse_readahead(struct readahead_control *rac) struct fuse_conn *fc = get_fuse_conn(inode); unsigned int i, max_pages, nr_pages = 0; - if (is_bad_inode(inode)) + if (fuse_is_bad(inode)) return; max_pages = min_t(unsigned int, fc->max_pages, @@ -1555,7 +1558,7 @@ static ssize_t fuse_file_read_iter(struct kiocb *iocb, struct iov_iter *to) struct fuse_file *ff = file->private_data; struct inode *inode = file_inode(file); - if (is_bad_inode(inode)) + if (fuse_is_bad(inode)) return -EIO; if (FUSE_IS_DAX(inode)) @@ -1573,7 +1576,7 @@ static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from) struct fuse_file *ff = file->private_data; struct inode *inode = file_inode(file); - if (is_bad_inode(inode)) + if (fuse_is_bad(inode)) return -EIO; if (FUSE_IS_DAX(inode)) @@ -2172,7 +2175,7 @@ static int fuse_writepages(struct address_space *mapping, int err; err = -EIO; - if (is_bad_inode(inode)) + if (fuse_is_bad(inode)) goto out; data.inode = inode; @@ -2954,7 +2957,7 @@ long fuse_ioctl_common(struct file *file, unsigned int cmd, if (!fuse_allow_current_process(fc)) return -EACCES; - if (is_bad_inode(inode)) + if (fuse_is_bad(inode)) return -EIO; return fuse_do_ioctl(file, cmd, arg, flags); diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index d51598017d1330..404d66f01e8d7b 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -172,6 +172,8 @@ enum { FUSE_I_INIT_RDPLUS, /** An operation changing file size is in progress */ FUSE_I_SIZE_UNSTABLE, + /* Bad inode */ + FUSE_I_BAD, }; struct fuse_conn; @@ -858,6 +860,16 @@ static inline u64 fuse_get_attr_version(struct fuse_conn *fc) return atomic64_read(&fc->attr_version); } +static inline void fuse_make_bad(struct inode *inode) +{ + set_bit(FUSE_I_BAD, &get_fuse_inode(inode)->state); +} + +static inline bool fuse_is_bad(struct inode *inode) +{ + return unlikely(test_bit(FUSE_I_BAD, &get_fuse_inode(inode)->state)); +} + /** Device operations */ extern const struct file_operations fuse_dev_operations; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 1a47afc95f8000..f94b0bb57619ca 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -132,7 +132,7 @@ static void fuse_evict_inode(struct inode *inode) fi->forget = NULL; } } - if (S_ISREG(inode->i_mode) && !is_bad_inode(inode)) { + if (S_ISREG(inode->i_mode) && !fuse_is_bad(inode)) { WARN_ON(!list_empty(&fi->write_files)); WARN_ON(!list_empty(&fi->queued_writes)); } @@ -342,7 +342,7 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid, unlock_new_inode(inode); } else if ((inode->i_mode ^ attr->mode) & S_IFMT) { /* Inode has changed type, any I/O on the old should fail */ - make_bad_inode(inode); + fuse_make_bad(inode); iput(inode); goto retry; } diff --git a/fs/fuse/readdir.c b/fs/fuse/readdir.c index 3b5e91045871a1..3441ffa740f3d7 100644 --- a/fs/fuse/readdir.c +++ b/fs/fuse/readdir.c @@ -207,7 +207,7 @@ static int fuse_direntplus_link(struct file *file, dput(dentry); goto retry; } - if (is_bad_inode(inode)) { + if (fuse_is_bad(inode)) { dput(dentry); return -EIO; } @@ -568,7 +568,7 @@ int fuse_readdir(struct file *file, struct dir_context *ctx) struct inode *inode = file_inode(file); int err; - if (is_bad_inode(inode)) + if (fuse_is_bad(inode)) return -EIO; mutex_lock(&ff->readdir.lock); diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c index 21a9e534417c07..d2c0e58c6416f8 100644 --- a/fs/fuse/virtio_fs.c +++ b/fs/fuse/virtio_fs.c @@ -1464,6 +1464,8 @@ static int virtio_fs_get_tree(struct fs_context *fsc) if (!sb->s_root) { err = virtio_fs_fill_super(sb, fsc); if (err) { + fuse_mount_put(fm); + sb->s_fs_info = NULL; deactivate_locked_super(sb); return err; } diff --git a/fs/fuse/xattr.c b/fs/fuse/xattr.c index 371bdcbc723372..cdea18de94f7e8 100644 --- a/fs/fuse/xattr.c +++ b/fs/fuse/xattr.c @@ -113,6 +113,9 @@ ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size) struct fuse_getxattr_out outarg; ssize_t ret; + if (fuse_is_bad(inode)) + return -EIO; + if (!fuse_allow_current_process(fm->fc)) return -EACCES; @@ -178,6 +181,9 @@ static int fuse_xattr_get(const struct xattr_handler *handler, struct dentry *dentry, struct inode *inode, const char *name, void *value, size_t size) { + if (fuse_is_bad(inode)) + return -EIO; + return fuse_getxattr(inode, name, value, size); } @@ -186,6 +192,9 @@ static int fuse_xattr_set(const struct xattr_handler *handler, const char *name, const void *value, size_t size, int flags) { + if (fuse_is_bad(inode)) + return -EIO; + if (!value) return fuse_removexattr(inode, name); diff --git a/fs/inode.c b/fs/inode.c index 9d78c37b00b817..5eea9912a0b9d8 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1627,7 +1627,9 @@ static void iput_final(struct inode *inode) else drop = generic_drop_inode(inode); - if (!drop && (sb->s_flags & SB_ACTIVE)) { + if (!drop && + !(inode->i_state & I_DONTCACHE) && + (sb->s_flags & SB_ACTIVE)) { inode_add_lru(inode); spin_unlock(&inode->i_lock); return; diff --git a/fs/io-wq.h b/fs/io-wq.h index cba36f03c3555a..aaa363f3589167 100644 --- a/fs/io-wq.h +++ b/fs/io-wq.h @@ -59,6 +59,7 @@ static inline void wq_list_add_tail(struct io_wq_work_node *node, list->last->next = node; list->last = node; } + node->next = NULL; } static inline void wq_list_cut(struct io_wq_work_list *list, diff --git a/fs/io_uring.c b/fs/io_uring.c index 86dac2b2e27637..907ecaffc33868 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -260,6 +260,7 @@ struct io_ring_ctx { unsigned int drain_next: 1; unsigned int eventfd_async: 1; unsigned int restricted: 1; + unsigned int sqo_dead: 1; /* * Ring buffer of indices into array of io_uring_sqe, which is @@ -353,6 +354,7 @@ struct io_ring_ctx { unsigned cq_entries; unsigned cq_mask; atomic_t cq_timeouts; + unsigned cq_last_tm_flush; unsigned long cq_check_overflow; struct wait_queue_head cq_wait; struct fasync_struct *cq_fasync; @@ -941,6 +943,10 @@ enum io_mem_account { ACCT_PINNED, }; +static void destroy_fixed_file_ref_node(struct fixed_file_ref_node *ref_node); +static struct fixed_file_ref_node *alloc_fixed_file_ref_node( + struct io_ring_ctx *ctx); + static void __io_complete_rw(struct io_kiocb *req, long res, long res2, struct io_comp_state *cs); static void io_cqring_fill_event(struct io_kiocb *req, long res); @@ -965,6 +971,8 @@ static ssize_t io_import_iovec(int rw, struct io_kiocb *req, static int io_setup_async_rw(struct io_kiocb *req, const struct iovec *iovec, const struct iovec *fast_iov, struct iov_iter *iter, bool force); +static void io_req_drop_files(struct io_kiocb *req); +static void io_req_task_queue(struct io_kiocb *req); static struct kmem_cache *req_cachep; @@ -985,8 +993,7 @@ EXPORT_SYMBOL(io_uring_get_socket); static inline void io_clean_op(struct io_kiocb *req) { - if (req->flags & (REQ_F_NEED_CLEANUP | REQ_F_BUFFER_SELECTED | - REQ_F_INFLIGHT)) + if (req->flags & (REQ_F_NEED_CLEANUP | REQ_F_BUFFER_SELECTED)) __io_clean_op(req); } @@ -1005,6 +1012,8 @@ static int __io_sq_thread_acquire_mm(struct io_ring_ctx *ctx) { struct mm_struct *mm; + if (current->flags & PF_EXITING) + return -EFAULT; if (current->mm) return 0; @@ -1206,11 +1215,6 @@ static void __io_commit_cqring(struct io_ring_ctx *ctx) /* order cqe stores with ring update */ smp_store_release(&rings->cq.tail, ctx->cached_cq_tail); - - if (wq_has_sleeper(&ctx->cq_wait)) { - wake_up_interruptible(&ctx->cq_wait); - kill_fasync(&ctx->cq_fasync, SIGIO, POLL_IN); - } } static void io_put_identity(struct io_uring_task *tctx, struct io_kiocb *req) @@ -1253,6 +1257,8 @@ static void io_req_clean_work(struct io_kiocb *req) free_fs_struct(fs); req->work.flags &= ~IO_WQ_WORK_FS; } + if (req->flags & REQ_F_INFLIGHT) + io_req_drop_files(req); io_put_identity(req->task->io_uring, req); } @@ -1369,6 +1375,13 @@ static bool io_grab_identity(struct io_kiocb *req) spin_unlock_irq(&ctx->inflight_lock); req->work.flags |= IO_WQ_WORK_FILES; } + if (!(req->work.flags & IO_WQ_WORK_MM) && + (def->work_flags & IO_WQ_WORK_MM)) { + if (id->mm != current->mm) + return false; + mmgrab(id->mm); + req->work.flags |= IO_WQ_WORK_MM; + } return true; } @@ -1393,13 +1406,6 @@ static void io_prep_async_work(struct io_kiocb *req) req->work.flags |= IO_WQ_WORK_UNBOUND; } - /* ->mm can never change on us */ - if (!(req->work.flags & IO_WQ_WORK_MM) && - (def->work_flags & IO_WQ_WORK_MM)) { - mmgrab(id->mm); - req->work.flags |= IO_WQ_WORK_MM; - } - /* if we fail grabbing identity, we must COW, regrab, and retry */ if (io_grab_identity(req)) return; @@ -1497,37 +1503,49 @@ static void __io_queue_deferred(struct io_ring_ctx *ctx) do { struct io_defer_entry *de = list_first_entry(&ctx->defer_list, struct io_defer_entry, list); - struct io_kiocb *link; if (req_need_defer(de->req, de->seq)) break; list_del_init(&de->list); - /* punt-init is done before queueing for defer */ - link = __io_queue_async_work(de->req); - if (link) { - __io_queue_linked_timeout(link); - /* drop submission reference */ - io_put_req_deferred(link, 1); - } + io_req_task_queue(de->req); kfree(de); } while (!list_empty(&ctx->defer_list)); } static void io_flush_timeouts(struct io_ring_ctx *ctx) { - while (!list_empty(&ctx->timeout_list)) { + u32 seq; + + if (list_empty(&ctx->timeout_list)) + return; + + seq = ctx->cached_cq_tail - atomic_read(&ctx->cq_timeouts); + + do { + u32 events_needed, events_got; struct io_kiocb *req = list_first_entry(&ctx->timeout_list, struct io_kiocb, timeout.list); if (io_is_timeout_noseq(req)) break; - if (req->timeout.target_seq != ctx->cached_cq_tail - - atomic_read(&ctx->cq_timeouts)) + + /* + * Since seq can easily wrap around over time, subtract + * the last seq at which timeouts were flushed before comparing. + * Assuming not more than 2^31-1 events have happened since, + * these subtractions won't have wrapped, so we can check if + * target is in [last_seq, current_seq] by comparing the two. + */ + events_needed = req->timeout.target_seq - ctx->cq_last_tm_flush; + events_got = seq - ctx->cq_last_tm_flush; + if (events_got < events_needed) break; list_del_init(&req->timeout.list); io_kill_timeout(req); - } + } while (!list_empty(&ctx->timeout_list)); + + ctx->cq_last_tm_flush = seq; } static void io_commit_cqring(struct io_ring_ctx *ctx) @@ -1577,6 +1595,10 @@ static inline bool io_should_trigger_evfd(struct io_ring_ctx *ctx) static void io_cqring_ev_posted(struct io_ring_ctx *ctx) { + if (wq_has_sleeper(&ctx->cq_wait)) { + wake_up_interruptible(&ctx->cq_wait); + kill_fasync(&ctx->cq_fasync, SIGIO, POLL_IN); + } if (waitqueue_active(&ctx->wait)) wake_up(&ctx->wait); if (ctx->sq_data && waitqueue_active(&ctx->sq_data->wait)) @@ -1621,9 +1643,9 @@ static bool io_match_files(struct io_kiocb *req, } /* Returns true if there are no backlogged entries after the flush */ -static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force, - struct task_struct *tsk, - struct files_struct *files) +static bool __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force, + struct task_struct *tsk, + struct files_struct *files) { struct io_rings *rings = ctx->rings; struct io_kiocb *req, *tmp; @@ -1632,8 +1654,6 @@ static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force, LIST_HEAD(list); if (!force) { - if (list_empty_careful(&ctx->cq_overflow_list)) - return true; if ((ctx->cached_cq_tail - READ_ONCE(rings->cq.head) == rings->cq_ring_entries)) return false; @@ -1641,10 +1661,6 @@ static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force, spin_lock_irqsave(&ctx->completion_lock, flags); - /* if force is set, the ring is going away. always drop after that */ - if (force) - ctx->cq_overflow_flushed = 1; - cqe = NULL; list_for_each_entry_safe(req, tmp, &ctx->cq_overflow_list, compl.list) { if (tsk && req->task != tsk) @@ -1683,6 +1699,20 @@ static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force, return cqe != NULL; } +static void io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force, + struct task_struct *tsk, + struct files_struct *files) +{ + if (test_bit(0, &ctx->cq_check_overflow)) { + /* iopoll syncs against uring_lock, not completion_lock */ + if (ctx->flags & IORING_SETUP_IOPOLL) + mutex_lock(&ctx->uring_lock); + __io_cqring_overflow_flush(ctx, force, tsk, files); + if (ctx->flags & IORING_SETUP_IOPOLL) + mutex_unlock(&ctx->uring_lock); + } +} + static void __io_cqring_fill_event(struct io_kiocb *req, long res, long cflags) { struct io_ring_ctx *ctx = req->ctx; @@ -2050,13 +2080,12 @@ static void __io_req_task_submit(struct io_kiocb *req) { struct io_ring_ctx *ctx = req->ctx; - if (!__io_sq_thread_acquire_mm(ctx)) { - mutex_lock(&ctx->uring_lock); + mutex_lock(&ctx->uring_lock); + if (!ctx->sqo_dead && !__io_sq_thread_acquire_mm(ctx)) __io_queue_sqe(req, NULL); - mutex_unlock(&ctx->uring_lock); - } else { + else __io_req_task_cancel(req, -EFAULT); - } + mutex_unlock(&ctx->uring_lock); } static void io_req_task_submit(struct callback_head *cb) @@ -2132,6 +2161,8 @@ static void io_req_free_batch_finish(struct io_ring_ctx *ctx, struct io_uring_task *tctx = rb->task->io_uring; percpu_counter_sub(&tctx->inflight, rb->task_refs); + if (atomic_read(&tctx->in_idle)) + wake_up(&tctx->wait); put_task_struct_many(rb->task, rb->task_refs); rb->task = NULL; } @@ -2151,6 +2182,8 @@ static void io_req_free_batch(struct req_batch *rb, struct io_kiocb *req) struct io_uring_task *tctx = rb->task->io_uring; percpu_counter_sub(&tctx->inflight, rb->task_refs); + if (atomic_read(&tctx->in_idle)) + wake_up(&tctx->wait); put_task_struct_many(rb->task, rb->task_refs); } rb->task = req->task; @@ -2236,22 +2269,10 @@ static void io_double_put_req(struct io_kiocb *req) io_free_req(req); } -static unsigned io_cqring_events(struct io_ring_ctx *ctx, bool noflush) +static unsigned io_cqring_events(struct io_ring_ctx *ctx) { struct io_rings *rings = ctx->rings; - if (test_bit(0, &ctx->cq_check_overflow)) { - /* - * noflush == true is from the waitqueue handler, just ensure - * we wake up the task, and the next invocation will flush the - * entries. We cannot safely to it from here. - */ - if (noflush && !list_empty(&ctx->cq_overflow_list)) - return -1U; - - io_cqring_overflow_flush(ctx, false, NULL, NULL); - } - /* See comment at the top of this file */ smp_rmb(); return ctx->cached_cq_tail - READ_ONCE(rings->cq.head); @@ -2476,7 +2497,9 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, long min) * If we do, we can potentially be spinning for commands that * already triggered a CQE (eg in error). */ - if (io_cqring_events(ctx, false)) + if (test_bit(0, &ctx->cq_check_overflow)) + __io_cqring_overflow_flush(ctx, false, NULL, NULL); + if (io_cqring_events(ctx)) break; /* @@ -3052,9 +3075,7 @@ static ssize_t io_iov_buffer_select(struct io_kiocb *req, struct iovec *iov, iov[0].iov_len = kbuf->len; return 0; } - if (!req->rw.len) - return 0; - else if (req->rw.len > 1) + if (req->rw.len != 1) return -EINVAL; #ifdef CONFIG_COMPAT @@ -3434,7 +3455,7 @@ static int io_read(struct io_kiocb *req, bool force_nonblock, /* read it all, or we did blocking attempt. no retry. */ if (!iov_iter_count(iter) || !force_nonblock || - (req->file->f_flags & O_NONBLOCK)) + (req->file->f_flags & O_NONBLOCK) || !(req->flags & REQ_F_ISREG)) goto done; io_size -= ret; @@ -3948,11 +3969,17 @@ static int io_remove_buffers(struct io_kiocb *req, bool force_nonblock, head = idr_find(&ctx->io_buffer_idr, p->bgid); if (head) ret = __io_remove_buffers(ctx, head, p->bgid, p->nbufs); - - io_ring_submit_lock(ctx, !force_nonblock); if (ret < 0) req_set_fail_links(req); - __io_req_complete(req, ret, 0, cs); + + /* need to hold the lock to complete IOPOLL requests */ + if (ctx->flags & IORING_SETUP_IOPOLL) { + __io_req_complete(req, ret, 0, cs); + io_ring_submit_unlock(ctx, !force_nonblock); + } else { + io_ring_submit_unlock(ctx, !force_nonblock); + __io_req_complete(req, ret, 0, cs); + } return 0; } @@ -4037,10 +4064,17 @@ static int io_provide_buffers(struct io_kiocb *req, bool force_nonblock, } } out: - io_ring_submit_unlock(ctx, !force_nonblock); if (ret < 0) req_set_fail_links(req); - __io_req_complete(req, ret, 0, cs); + + /* need to hold the lock to complete IOPOLL requests */ + if (ctx->flags & IORING_SETUP_IOPOLL) { + __io_req_complete(req, ret, 0, cs); + io_ring_submit_unlock(ctx, !force_nonblock); + } else { + io_ring_submit_unlock(ctx, !force_nonblock); + __io_req_complete(req, ret, 0, cs); + } return 0; } @@ -4210,7 +4244,6 @@ static int io_close_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) * io_wq_work.flags, so initialize io_wq_work firstly. */ io_req_init_async(req); - req->work.flags |= IO_WQ_WORK_NO_CANCEL; if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL|IORING_SETUP_SQPOLL))) return -EINVAL; @@ -4243,6 +4276,8 @@ static int io_close(struct io_kiocb *req, bool force_nonblock, /* if the file has a flush method, be safe and punt to async */ if (close->put_file->f_op->flush && force_nonblock) { + /* not safe to cancel at this point */ + req->work.flags |= IO_WQ_WORK_NO_CANCEL; /* was never set, but play safe */ req->flags &= ~REQ_F_NOWAIT; /* avoid grabbing files - we don't need the files */ @@ -5566,6 +5601,12 @@ static int io_timeout(struct io_kiocb *req) tail = ctx->cached_cq_tail - atomic_read(&ctx->cq_timeouts); req->timeout.target_seq = tail + off; + /* Update the last seq here in case io_flush_timeouts() hasn't. + * This is safe because ->completion_lock is held, and submissions + * and completions are never mixed in the same ->completion_lock section. + */ + ctx->cq_last_tm_flush = tail; + /* * Insertion sort, ensuring the first entry in the list is always * the one we need first. @@ -5854,15 +5895,15 @@ static void io_req_drop_files(struct io_kiocb *req) struct io_ring_ctx *ctx = req->ctx; unsigned long flags; + put_files_struct(req->work.identity->files); + put_nsproxy(req->work.identity->nsproxy); spin_lock_irqsave(&ctx->inflight_lock, flags); list_del(&req->inflight_entry); - if (waitqueue_active(&ctx->inflight_wait)) - wake_up(&ctx->inflight_wait); spin_unlock_irqrestore(&ctx->inflight_lock, flags); req->flags &= ~REQ_F_INFLIGHT; - put_files_struct(req->work.identity->files); - put_nsproxy(req->work.identity->nsproxy); req->work.flags &= ~IO_WQ_WORK_FILES; + if (waitqueue_active(&ctx->inflight_wait)) + wake_up(&ctx->inflight_wait); } static void __io_clean_op(struct io_kiocb *req) @@ -5915,9 +5956,6 @@ static void __io_clean_op(struct io_kiocb *req) } req->flags &= ~REQ_F_NEED_CLEANUP; } - - if (req->flags & REQ_F_INFLIGHT) - io_req_drop_files(req); } static int io_issue_sqe(struct io_kiocb *req, bool force_nonblock, @@ -6074,8 +6112,28 @@ static struct io_wq_work *io_wq_submit_work(struct io_wq_work *work) } if (ret) { + struct io_ring_ctx *lock_ctx = NULL; + + if (req->ctx->flags & IORING_SETUP_IOPOLL) + lock_ctx = req->ctx; + + /* + * io_iopoll_complete() does not hold completion_lock to + * complete polled io, so here for polled io, we can not call + * io_req_complete() directly, otherwise there maybe concurrent + * access to cqring, defer_list, etc, which is not safe. Given + * that io_iopoll_complete() is always called under uring_lock, + * so here for polled io, we also get uring_lock to complete + * it. + */ + if (lock_ctx) + mutex_lock(&lock_ctx->uring_lock); + req_set_fail_links(req); io_req_complete(req, ret); + + if (lock_ctx) + mutex_unlock(&lock_ctx->uring_lock); } return io_steal_work(req); @@ -6548,8 +6606,7 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr) /* if we have a backlog and couldn't flush it all, return BUSY */ if (test_bit(0, &ctx->sq_check_overflow)) { - if (!list_empty(&ctx->cq_overflow_list) && - !io_cqring_overflow_flush(ctx, false, NULL, NULL)) + if (!__io_cqring_overflow_flush(ctx, false, NULL, NULL)) return -EBUSY; } @@ -6730,7 +6787,7 @@ static enum sq_ret __io_sq_thread(struct io_ring_ctx *ctx, to_submit = 8; mutex_lock(&ctx->uring_lock); - if (likely(!percpu_ref_is_dying(&ctx->refs))) + if (likely(!percpu_ref_is_dying(&ctx->refs) && !ctx->sqo_dead)) ret = io_submit_sqes(ctx, to_submit); mutex_unlock(&ctx->uring_lock); @@ -6771,8 +6828,16 @@ static int io_sq_thread(void *data) * kthread parking. This synchronizes the thread vs users, * the users are synchronized on the sqd->ctx_lock. */ - if (kthread_should_park()) + if (kthread_should_park()) { kthread_parkme(); + /* + * When sq thread is unparked, in case the previous park operation + * comes from io_put_sq_data(), which means that sq thread is going + * to be stopped, so here needs to have a check. + */ + if (kthread_should_stop()) + break; + } if (unlikely(!list_empty(&sqd->ctx_new_list))) io_sqd_init_new(sqd); @@ -6798,6 +6863,7 @@ static int io_sq_thread(void *data) if (ret & SQT_SPIN) { io_run_task_work(); + io_sq_thread_drop_mm(); cond_resched(); } else if (ret == SQT_IDLE) { if (kthread_should_park()) @@ -6812,6 +6878,7 @@ static int io_sq_thread(void *data) } io_run_task_work(); + io_sq_thread_drop_mm(); if (cur_css) io_sq_thread_unassociate_blkcg(); @@ -6830,7 +6897,7 @@ struct io_wait_queue { unsigned nr_timeouts; }; -static inline bool io_should_wake(struct io_wait_queue *iowq, bool noflush) +static inline bool io_should_wake(struct io_wait_queue *iowq) { struct io_ring_ctx *ctx = iowq->ctx; @@ -6839,7 +6906,7 @@ static inline bool io_should_wake(struct io_wait_queue *iowq, bool noflush) * started waiting. For timeouts, we always want to return to userspace, * regardless of event count. */ - return io_cqring_events(ctx, noflush) >= iowq->to_wait || + return io_cqring_events(ctx) >= iowq->to_wait || atomic_read(&ctx->cq_timeouts) != iowq->nr_timeouts; } @@ -6849,11 +6916,13 @@ static int io_wake_function(struct wait_queue_entry *curr, unsigned int mode, struct io_wait_queue *iowq = container_of(curr, struct io_wait_queue, wq); - /* use noflush == true, as we can't safely rely on locking context */ - if (!io_should_wake(iowq, true)) - return -1; - - return autoremove_wake_function(curr, mode, wake_flags, key); + /* + * Cannot safely flush overflowed CQEs from here, ensure we wake up + * the task, and the next invocation will do it. + */ + if (io_should_wake(iowq) || test_bit(0, &iowq->ctx->cq_check_overflow)) + return autoremove_wake_function(curr, mode, wake_flags, key); + return -1; } static int io_run_task_work_sig(void) @@ -6892,7 +6961,8 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, int ret = 0; do { - if (io_cqring_events(ctx, false) >= min_events) + io_cqring_overflow_flush(ctx, false, NULL, NULL); + if (io_cqring_events(ctx) >= min_events) return 0; if (!io_run_task_work()) break; @@ -6914,6 +6984,7 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, iowq.nr_timeouts = atomic_read(&ctx->cq_timeouts); trace_io_uring_cqring_wait(ctx, min_events); do { + io_cqring_overflow_flush(ctx, false, NULL, NULL); prepare_to_wait_exclusive(&ctx->wait, &iowq.wq, TASK_INTERRUPTIBLE); /* make sure we run task_work before checking for signals */ @@ -6922,8 +6993,10 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, continue; else if (ret < 0) break; - if (io_should_wake(&iowq, false)) + if (io_should_wake(&iowq)) break; + if (test_bit(0, &ctx->cq_check_overflow)) + continue; schedule(); } while (1); finish_wait(&ctx->wait, &iowq.wq); @@ -6964,18 +7037,32 @@ static void io_file_ref_kill(struct percpu_ref *ref) complete(&data->done); } +static void io_sqe_files_set_node(struct fixed_file_data *file_data, + struct fixed_file_ref_node *ref_node) +{ + spin_lock_bh(&file_data->lock); + file_data->node = ref_node; + list_add_tail(&ref_node->node, &file_data->ref_list); + spin_unlock_bh(&file_data->lock); + percpu_ref_get(&file_data->refs); +} + static int io_sqe_files_unregister(struct io_ring_ctx *ctx) { struct fixed_file_data *data = ctx->file_data; - struct fixed_file_ref_node *ref_node = NULL; + struct fixed_file_ref_node *backup_node, *ref_node = NULL; unsigned nr_tables, i; + int ret; if (!data) return -ENXIO; + backup_node = alloc_fixed_file_ref_node(ctx); + if (!backup_node) + return -ENOMEM; - spin_lock(&data->lock); + spin_lock_bh(&data->lock); ref_node = data->node; - spin_unlock(&data->lock); + spin_unlock_bh(&data->lock); if (ref_node) percpu_ref_kill(&ref_node->refs); @@ -6983,7 +7070,18 @@ static int io_sqe_files_unregister(struct io_ring_ctx *ctx) /* wait for all refs nodes to complete */ flush_delayed_work(&ctx->file_put_work); - wait_for_completion(&data->done); + do { + ret = wait_for_completion_interruptible(&data->done); + if (!ret) + break; + ret = io_run_task_work_sig(); + if (ret < 0) { + percpu_ref_resurrect(&data->refs); + reinit_completion(&data->done); + io_sqe_files_set_node(data, backup_node); + return ret; + } + } while (1); __io_sqe_files_unregister(ctx); nr_tables = DIV_ROUND_UP(ctx->nr_user_files, IORING_MAX_FILES_TABLE); @@ -6994,6 +7092,7 @@ static int io_sqe_files_unregister(struct io_ring_ctx *ctx) kfree(data); ctx->file_data = NULL; ctx->nr_user_files = 0; + destroy_fixed_file_ref_node(backup_node); return 0; } @@ -7358,7 +7457,7 @@ static void io_file_data_ref_zero(struct percpu_ref *ref) data = ref_node->file_data; ctx = data->ctx; - spin_lock(&data->lock); + spin_lock_bh(&data->lock); ref_node->done = true; while (!list_empty(&data->ref_list)) { @@ -7370,7 +7469,7 @@ static void io_file_data_ref_zero(struct percpu_ref *ref) list_del(&ref_node->node); first_add |= llist_add(&ref_node->llist, &ctx->file_put_llist); } - spin_unlock(&data->lock); + spin_unlock_bh(&data->lock); if (percpu_ref_is_dying(&data->refs)) delay = 0; @@ -7388,12 +7487,12 @@ static struct fixed_file_ref_node *alloc_fixed_file_ref_node( ref_node = kzalloc(sizeof(*ref_node), GFP_KERNEL); if (!ref_node) - return ERR_PTR(-ENOMEM); + return NULL; if (percpu_ref_init(&ref_node->refs, io_file_data_ref_zero, 0, GFP_KERNEL)) { kfree(ref_node); - return ERR_PTR(-ENOMEM); + return NULL; } INIT_LIST_HEAD(&ref_node->node); INIT_LIST_HEAD(&ref_node->file_list); @@ -7487,16 +7586,12 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg, } ref_node = alloc_fixed_file_ref_node(ctx); - if (IS_ERR(ref_node)) { + if (!ref_node) { io_sqe_files_unregister(ctx); - return PTR_ERR(ref_node); + return -ENOMEM; } - file_data->node = ref_node; - spin_lock(&file_data->lock); - list_add_tail(&ref_node->node, &file_data->ref_list); - spin_unlock(&file_data->lock); - percpu_ref_get(&file_data->refs); + io_sqe_files_set_node(file_data, ref_node); return ret; out_fput: for (i = 0; i < ctx->nr_user_files; i++) { @@ -7593,8 +7688,8 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx, return -EINVAL; ref_node = alloc_fixed_file_ref_node(ctx); - if (IS_ERR(ref_node)) - return PTR_ERR(ref_node); + if (!ref_node) + return -ENOMEM; done = 0; fds = u64_to_user_ptr(up->fds); @@ -7652,11 +7747,7 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx, if (needs_switch) { percpu_ref_kill(&data->node->refs); - spin_lock(&data->lock); - list_add_tail(&ref_node->node, &data->ref_list); - data->node = ref_node; - spin_unlock(&data->lock); - percpu_ref_get(&ctx->file_data->refs); + io_sqe_files_set_node(data, ref_node); } else destroy_fixed_file_ref_node(ref_node); @@ -8330,7 +8421,8 @@ static __poll_t io_uring_poll(struct file *file, poll_table *wait) smp_rmb(); if (!io_sqring_full(ctx)) mask |= EPOLLOUT | EPOLLWRNORM; - if (io_cqring_events(ctx, false)) + io_cqring_overflow_flush(ctx, false, NULL, NULL); + if (io_cqring_events(ctx)) mask |= EPOLLIN | EPOLLRDNORM; return mask; @@ -8369,28 +8461,39 @@ static void io_ring_exit_work(struct work_struct *work) * as nobody else will be looking for them. */ do { - if (ctx->rings) - io_cqring_overflow_flush(ctx, true, NULL, NULL); io_iopoll_try_reap_events(ctx); } while (!wait_for_completion_timeout(&ctx->ref_comp, HZ/20)); io_ring_ctx_free(ctx); } +static bool io_cancel_ctx_cb(struct io_wq_work *work, void *data) +{ + struct io_kiocb *req = container_of(work, struct io_kiocb, work); + + return req->ctx == data; +} + static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx) { mutex_lock(&ctx->uring_lock); percpu_ref_kill(&ctx->refs); + /* if force is set, the ring is going away. always drop after that */ + + if (WARN_ON_ONCE((ctx->flags & IORING_SETUP_SQPOLL) && !ctx->sqo_dead)) + ctx->sqo_dead = 1; + + ctx->cq_overflow_flushed = 1; + if (ctx->rings) + __io_cqring_overflow_flush(ctx, true, NULL, NULL); mutex_unlock(&ctx->uring_lock); io_kill_timeouts(ctx, NULL); io_poll_remove_all(ctx, NULL); if (ctx->io_wq) - io_wq_cancel_all(ctx->io_wq); + io_wq_cancel_cb(ctx->io_wq, io_cancel_ctx_cb, ctx, true); /* if we failed setting up the ctx, we might not have any rings */ - if (ctx->rings) - io_cqring_overflow_flush(ctx, true, NULL, NULL); io_iopoll_try_reap_events(ctx); idr_for_each(&ctx->personality_idr, io_remove_personalities, ctx); @@ -8421,14 +8524,6 @@ static int io_uring_release(struct inode *inode, struct file *file) return 0; } -static bool io_wq_files_match(struct io_wq_work *work, void *data) -{ - struct files_struct *files = data; - - return !files || ((work->flags & IO_WQ_WORK_FILES) && - work->identity->files == files); -} - /* * Returns true if 'preq' is the link parent of 'req' */ @@ -8566,21 +8661,20 @@ static void io_cancel_defer_files(struct io_ring_ctx *ctx, * Returns true if we found and killed one or more files pinning requests */ static bool io_uring_cancel_files(struct io_ring_ctx *ctx, + struct task_struct *task, struct files_struct *files) { if (list_empty_careful(&ctx->inflight_list)) return false; - /* cancel all at once, should be faster than doing it one by one*/ - io_wq_cancel_cb(ctx->io_wq, io_wq_files_match, files, true); - while (!list_empty_careful(&ctx->inflight_list)) { struct io_kiocb *cancel_req = NULL, *req; DEFINE_WAIT(wait); spin_lock_irq(&ctx->inflight_lock); list_for_each_entry(req, &ctx->inflight_list, inflight_entry) { - if (files && (req->work.flags & IO_WQ_WORK_FILES) && + if (req->task == task && + (req->work.flags & IO_WQ_WORK_FILES) && req->work.identity->files != files) continue; /* req is being completed, ignore */ @@ -8599,6 +8693,8 @@ static bool io_uring_cancel_files(struct io_ring_ctx *ctx, break; /* cancel this request, or head link requests */ io_attempt_cancel(ctx, cancel_req); + io_cqring_overflow_flush(ctx, true, task, files); + io_put_req(cancel_req); /* cancellations _may_ trigger task work */ io_run_task_work(); @@ -8623,7 +8719,7 @@ static bool __io_uring_cancel_task_requests(struct io_ring_ctx *ctx, { bool ret; - ret = io_uring_cancel_files(ctx, files); + ret = io_uring_cancel_files(ctx, task, files); if (!files) { enum io_wq_cancel cret; @@ -8646,6 +8742,17 @@ static bool __io_uring_cancel_task_requests(struct io_ring_ctx *ctx, return ret; } +static void io_disable_sqo_submit(struct io_ring_ctx *ctx) +{ + mutex_lock(&ctx->uring_lock); + ctx->sqo_dead = 1; + mutex_unlock(&ctx->uring_lock); + + /* make sure callers enter the ring to get error */ + if (ctx->rings) + io_ring_set_wakeup_flag(ctx); +} + /* * We need to iteratively cancel requests, in case a request has dependent * hard links. These persist even for failure of cancelations, hence keep @@ -8657,16 +8764,15 @@ static void io_uring_cancel_task_requests(struct io_ring_ctx *ctx, struct task_struct *task = current; if ((ctx->flags & IORING_SETUP_SQPOLL) && ctx->sq_data) { + /* for SQPOLL only sqo_task has task notes */ + WARN_ON_ONCE(ctx->sqo_task != current); + io_disable_sqo_submit(ctx); task = ctx->sq_data->thread; atomic_inc(&task->io_uring->in_idle); io_sq_thread_park(ctx->sq_data); } - if (files) - io_cancel_defer_files(ctx, NULL, files); - else - io_cancel_defer_files(ctx, task, NULL); - + io_cancel_defer_files(ctx, task, files); io_cqring_overflow_flush(ctx, true, task, files); while (__io_uring_cancel_task_requests(ctx, task, files)) { @@ -8692,10 +8798,9 @@ static void io_uring_cancel_task_requests(struct io_ring_ctx *ctx, static int io_uring_add_task_file(struct io_ring_ctx *ctx, struct file *file) { struct io_uring_task *tctx = current->io_uring; + int ret; if (unlikely(!tctx)) { - int ret; - ret = io_uring_alloc_task_context(current); if (unlikely(ret)) return ret; @@ -8706,7 +8811,12 @@ static int io_uring_add_task_file(struct io_ring_ctx *ctx, struct file *file) if (!old) { get_file(file); - xa_store(&tctx->xa, (unsigned long)file, file, GFP_KERNEL); + ret = xa_err(xa_store(&tctx->xa, (unsigned long)file, + file, GFP_KERNEL)); + if (ret) { + fput(file); + return ret; + } } tctx->last = file; } @@ -8736,20 +8846,12 @@ static void io_uring_del_task_file(struct file *file) fput(file); } -/* - * Drop task note for this file if we're the only ones that hold it after - * pending fput() - */ -static void io_uring_attempt_task_drop(struct file *file) +static void io_uring_remove_task_files(struct io_uring_task *tctx) { - if (!current->io_uring) - return; - /* - * fput() is pending, will be 2 if the only other ref is our potential - * task file note. If the task is exiting, drop regardless of count. - */ - if (fatal_signal_pending(current) || (current->flags & PF_EXITING) || - atomic_long_read(&file->f_count) == 2) + struct file *file; + unsigned long index; + + xa_for_each(&tctx->xa, index, file) io_uring_del_task_file(file); } @@ -8761,16 +8863,12 @@ void __io_uring_files_cancel(struct files_struct *files) /* make sure overflow events are dropped */ atomic_inc(&tctx->in_idle); - - xa_for_each(&tctx->xa, index, file) { - struct io_ring_ctx *ctx = file->private_data; - - io_uring_cancel_task_requests(ctx, files); - if (files) - io_uring_del_task_file(file); - } - + xa_for_each(&tctx->xa, index, file) + io_uring_cancel_task_requests(file->private_data, files); atomic_dec(&tctx->in_idle); + + if (files) + io_uring_remove_task_files(tctx); } static s64 tctx_inflight(struct io_uring_task *tctx) @@ -8813,6 +8911,10 @@ void __io_uring_task_cancel(void) /* make sure overflow events are dropped */ atomic_inc(&tctx->in_idle); + /* trigger io_disable_sqo_submit() */ + if (tctx->sqpoll) + __io_uring_files_cancel(NULL); + do { /* read completions before cancelations */ inflight = tctx_inflight(tctx); @@ -8833,11 +8935,42 @@ void __io_uring_task_cancel(void) finish_wait(&tctx->wait, &wait); atomic_dec(&tctx->in_idle); + + io_uring_remove_task_files(tctx); } static int io_uring_flush(struct file *file, void *data) { - io_uring_attempt_task_drop(file); + struct io_uring_task *tctx = current->io_uring; + struct io_ring_ctx *ctx = file->private_data; + + if (!tctx) + return 0; + + /* we should have cancelled and erased it before PF_EXITING */ + WARN_ON_ONCE((current->flags & PF_EXITING) && + xa_load(&tctx->xa, (unsigned long)file)); + + /* + * fput() is pending, will be 2 if the only other ref is our potential + * task file note. If the task is exiting, drop regardless of count. + */ + if (atomic_long_read(&file->f_count) != 2) + return 0; + + if (ctx->flags & IORING_SETUP_SQPOLL) { + /* there is only one file note, which is owned by sqo_task */ + WARN_ON_ONCE(ctx->sqo_task != current && + xa_load(&tctx->xa, (unsigned long)file)); + /* sqo_dead check is for when this happens after cancellation */ + WARN_ON_ONCE(ctx->sqo_task == current && !ctx->sqo_dead && + !xa_load(&tctx->xa, (unsigned long)file)); + + io_disable_sqo_submit(ctx); + } + + if (!(ctx->flags & IORING_SETUP_SQPOLL) || ctx->sqo_task == current) + io_uring_del_task_file(file); return 0; } @@ -8911,8 +9044,9 @@ static unsigned long io_uring_nommu_get_unmapped_area(struct file *file, #endif /* !CONFIG_MMU */ -static void io_sqpoll_wait_sq(struct io_ring_ctx *ctx) +static int io_sqpoll_wait_sq(struct io_ring_ctx *ctx) { + int ret = 0; DEFINE_WAIT(wait); do { @@ -8921,6 +9055,11 @@ static void io_sqpoll_wait_sq(struct io_ring_ctx *ctx) prepare_to_wait(&ctx->sqo_sq_wait, &wait, TASK_INTERRUPTIBLE); + if (unlikely(ctx->sqo_dead)) { + ret = -EOWNERDEAD; + goto out; + } + if (!io_sqring_full(ctx)) break; @@ -8928,6 +9067,8 @@ static void io_sqpoll_wait_sq(struct io_ring_ctx *ctx) } while (!signal_pending(current)); finish_wait(&ctx->sqo_sq_wait, &wait); +out: + return ret; } SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit, @@ -8969,12 +9110,18 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit, */ ret = 0; if (ctx->flags & IORING_SETUP_SQPOLL) { - if (!list_empty_careful(&ctx->cq_overflow_list)) - io_cqring_overflow_flush(ctx, false, NULL, NULL); + io_cqring_overflow_flush(ctx, false, NULL, NULL); + + ret = -EOWNERDEAD; + if (unlikely(ctx->sqo_dead)) + goto out; if (flags & IORING_ENTER_SQ_WAKEUP) wake_up(&ctx->sq_data->wait); - if (flags & IORING_ENTER_SQ_WAIT) - io_sqpoll_wait_sq(ctx); + if (flags & IORING_ENTER_SQ_WAIT) { + ret = io_sqpoll_wait_sq(ctx); + if (ret) + goto out; + } submitted = to_submit; } else if (to_submit) { ret = io_uring_add_task_file(ctx, f.file); @@ -9173,55 +9320,52 @@ static int io_allocate_scq_urings(struct io_ring_ctx *ctx, return 0; } +static int io_uring_install_fd(struct io_ring_ctx *ctx, struct file *file) +{ + int ret, fd; + + fd = get_unused_fd_flags(O_RDWR | O_CLOEXEC); + if (fd < 0) + return fd; + + ret = io_uring_add_task_file(ctx, file); + if (ret) { + put_unused_fd(fd); + return ret; + } + fd_install(fd, file); + return fd; +} + /* * Allocate an anonymous fd, this is what constitutes the application * visible backing of an io_uring instance. The application mmaps this * fd to gain access to the SQ/CQ ring details. If UNIX sockets are enabled, * we have to tie this fd to a socket for file garbage collection purposes. */ -static int io_uring_get_fd(struct io_ring_ctx *ctx) +static struct file *io_uring_get_file(struct io_ring_ctx *ctx) { struct file *file; +#if defined(CONFIG_UNIX) int ret; - int fd; -#if defined(CONFIG_UNIX) ret = sock_create_kern(&init_net, PF_UNIX, SOCK_RAW, IPPROTO_IP, &ctx->ring_sock); if (ret) - return ret; + return ERR_PTR(ret); #endif - ret = get_unused_fd_flags(O_RDWR | O_CLOEXEC); - if (ret < 0) - goto err; - fd = ret; - file = anon_inode_getfile("[io_uring]", &io_uring_fops, ctx, O_RDWR | O_CLOEXEC); +#if defined(CONFIG_UNIX) if (IS_ERR(file)) { - put_unused_fd(fd); - ret = PTR_ERR(file); - goto err; + sock_release(ctx->ring_sock); + ctx->ring_sock = NULL; + } else { + ctx->ring_sock->file = file; } - -#if defined(CONFIG_UNIX) - ctx->ring_sock->file = file; #endif - ret = io_uring_add_task_file(ctx, file); - if (ret) { - fput(file); - put_unused_fd(fd); - goto err; - } - fd_install(fd, file); - return fd; -err: -#if defined(CONFIG_UNIX) - sock_release(ctx->ring_sock); - ctx->ring_sock = NULL; -#endif - return ret; + return file; } static int io_uring_create(unsigned entries, struct io_uring_params *p, @@ -9229,6 +9373,7 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p, { struct user_struct *user = NULL; struct io_ring_ctx *ctx; + struct file *file; bool limit_mem; int ret; @@ -9375,17 +9520,28 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p, goto err; } + file = io_uring_get_file(ctx); + if (IS_ERR(file)) { + ret = PTR_ERR(file); + goto err; + } + /* * Install ring fd as the very last thing, so we don't risk someone * having closed it before we finish setup */ - ret = io_uring_get_fd(ctx); - if (ret < 0) - goto err; + ret = io_uring_install_fd(ctx, file); + if (ret < 0) { + io_disable_sqo_submit(ctx); + /* fput will clean it up */ + fput(file); + return ret; + } trace_io_uring_create(ret, ctx, p->sq_entries, p->cq_entries, p->flags); return ret; err: + io_disable_sqo_submit(ctx); io_ring_ctx_wait_and_kill(ctx); return ret; } diff --git a/fs/jffs2/jffs2_fs_sb.h b/fs/jffs2/jffs2_fs_sb.h index 778275f48a8795..5a7091746f68b3 100644 --- a/fs/jffs2/jffs2_fs_sb.h +++ b/fs/jffs2/jffs2_fs_sb.h @@ -38,6 +38,7 @@ struct jffs2_mount_opts { * users. This is implemented simply by means of not allowing the * latter users to write to the file system if the amount if the * available space is less then 'rp_size'. */ + bool set_rp_size; unsigned int rp_size; }; diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c index 2f6f0b140c05aa..03b4f99614bef8 100644 --- a/fs/jffs2/readinode.c +++ b/fs/jffs2/readinode.c @@ -672,6 +672,22 @@ static inline int read_direntry(struct jffs2_sb_info *c, struct jffs2_raw_node_r jffs2_free_full_dirent(fd); return -EIO; } + +#ifdef CONFIG_JFFS2_SUMMARY + /* + * we use CONFIG_JFFS2_SUMMARY because without it, we + * have checked it while mounting + */ + crc = crc32(0, fd->name, rd->nsize); + if (unlikely(crc != je32_to_cpu(rd->name_crc))) { + JFFS2_NOTICE("name CRC failed on dirent node at" + "%#08x: read %#08x,calculated %#08x\n", + ref_offset(ref), je32_to_cpu(rd->node_crc), crc); + jffs2_mark_node_obsolete(c, ref); + jffs2_free_full_dirent(fd); + return 0; + } +#endif } fd->nhash = full_name_hash(NULL, fd->name, rd->nsize); diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c index 05d7878dfad154..81ca58c10b728c 100644 --- a/fs/jffs2/super.c +++ b/fs/jffs2/super.c @@ -88,7 +88,7 @@ static int jffs2_show_options(struct seq_file *s, struct dentry *root) if (opts->override_compr) seq_printf(s, ",compr=%s", jffs2_compr_name(opts->compr)); - if (opts->rp_size) + if (opts->set_rp_size) seq_printf(s, ",rp_size=%u", opts->rp_size / 1024); return 0; @@ -202,11 +202,8 @@ static int jffs2_parse_param(struct fs_context *fc, struct fs_parameter *param) case Opt_rp_size: if (result.uint_32 > UINT_MAX / 1024) return invalf(fc, "jffs2: rp_size unrepresentable"); - opt = result.uint_32 * 1024; - if (opt > c->mtd->size) - return invalf(fc, "jffs2: Too large reserve pool specified, max is %llu KB", - c->mtd->size / 1024); - c->mount_opts.rp_size = opt; + c->mount_opts.rp_size = result.uint_32 * 1024; + c->mount_opts.set_rp_size = true; break; default: return -EINVAL; @@ -215,11 +212,30 @@ static int jffs2_parse_param(struct fs_context *fc, struct fs_parameter *param) return 0; } +static inline void jffs2_update_mount_opts(struct fs_context *fc) +{ + struct jffs2_sb_info *new_c = fc->s_fs_info; + struct jffs2_sb_info *c = JFFS2_SB_INFO(fc->root->d_sb); + + mutex_lock(&c->alloc_sem); + if (new_c->mount_opts.override_compr) { + c->mount_opts.override_compr = new_c->mount_opts.override_compr; + c->mount_opts.compr = new_c->mount_opts.compr; + } + if (new_c->mount_opts.set_rp_size) { + c->mount_opts.set_rp_size = new_c->mount_opts.set_rp_size; + c->mount_opts.rp_size = new_c->mount_opts.rp_size; + } + mutex_unlock(&c->alloc_sem); +} + static int jffs2_reconfigure(struct fs_context *fc) { struct super_block *sb = fc->root->d_sb; sync_filesystem(sb); + jffs2_update_mount_opts(fc); + return jffs2_do_remount_fs(sb, fc); } @@ -249,6 +265,10 @@ static int jffs2_fill_super(struct super_block *sb, struct fs_context *fc) c->mtd = sb->s_mtd; c->os_priv = sb; + if (c->mount_opts.rp_size > c->mtd->size) + return invalf(fc, "jffs2: Too large reserve pool specified, max is %llu KB", + c->mtd->size / 1024); + /* Initialize JFFS2 superblock locks, the further initialization will * be done later */ mutex_init(&c->alloc_sem); diff --git a/fs/jfs/jfs_dmap.h b/fs/jfs/jfs_dmap.h index 29891fad3f0956..aa03a904d5ab25 100644 --- a/fs/jfs/jfs_dmap.h +++ b/fs/jfs/jfs_dmap.h @@ -183,7 +183,7 @@ typedef union dmtree { #define dmt_leafidx t1.leafidx #define dmt_height t1.height #define dmt_budmin t1.budmin -#define dmt_stree t1.stree +#define dmt_stree t2.stree /* * on-disk aggregate disk allocation map descriptor. diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c index f277d023ebcd14..c7571931214751 100644 --- a/fs/kernfs/file.c +++ b/fs/kernfs/file.c @@ -14,6 +14,7 @@ #include #include #include +#include #include "kernfs-internal.h" @@ -180,11 +181,10 @@ static const struct seq_operations kernfs_seq_ops = { * it difficult to use seq_file. Implement simplistic custom buffering for * bin files. */ -static ssize_t kernfs_file_direct_read(struct kernfs_open_file *of, - char __user *user_buf, size_t count, - loff_t *ppos) +static ssize_t kernfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) { - ssize_t len = min_t(size_t, count, PAGE_SIZE); + struct kernfs_open_file *of = kernfs_of(iocb->ki_filp); + ssize_t len = min_t(size_t, iov_iter_count(iter), PAGE_SIZE); const struct kernfs_ops *ops; char *buf; @@ -210,7 +210,7 @@ static ssize_t kernfs_file_direct_read(struct kernfs_open_file *of, of->event = atomic_read(&of->kn->attr.open->event); ops = kernfs_ops(of->kn); if (ops->read) - len = ops->read(of, buf, len, *ppos); + len = ops->read(of, buf, len, iocb->ki_pos); else len = -EINVAL; @@ -220,12 +220,12 @@ static ssize_t kernfs_file_direct_read(struct kernfs_open_file *of, if (len < 0) goto out_free; - if (copy_to_user(user_buf, buf, len)) { + if (copy_to_iter(buf, len, iter) != len) { len = -EFAULT; goto out_free; } - *ppos += len; + iocb->ki_pos += len; out_free: if (buf == of->prealloc_buf) @@ -235,31 +235,14 @@ static ssize_t kernfs_file_direct_read(struct kernfs_open_file *of, return len; } -/** - * kernfs_fop_read - kernfs vfs read callback - * @file: file pointer - * @user_buf: data to write - * @count: number of bytes - * @ppos: starting offset - */ -static ssize_t kernfs_fop_read(struct file *file, char __user *user_buf, - size_t count, loff_t *ppos) +static ssize_t kernfs_fop_read_iter(struct kiocb *iocb, struct iov_iter *iter) { - struct kernfs_open_file *of = kernfs_of(file); - - if (of->kn->flags & KERNFS_HAS_SEQ_SHOW) - return seq_read(file, user_buf, count, ppos); - else - return kernfs_file_direct_read(of, user_buf, count, ppos); + if (kernfs_of(iocb->ki_filp)->kn->flags & KERNFS_HAS_SEQ_SHOW) + return seq_read_iter(iocb, iter); + return kernfs_file_read_iter(iocb, iter); } -/** - * kernfs_fop_write - kernfs vfs write callback - * @file: file pointer - * @user_buf: data to write - * @count: number of bytes - * @ppos: starting offset - * +/* * Copy data in from userland and pass it to the matching kernfs write * operation. * @@ -269,20 +252,18 @@ static ssize_t kernfs_fop_read(struct file *file, char __user *user_buf, * modify only the the value you're changing, then write entire buffer * back. */ -static ssize_t kernfs_fop_write(struct file *file, const char __user *user_buf, - size_t count, loff_t *ppos) +static ssize_t kernfs_fop_write_iter(struct kiocb *iocb, struct iov_iter *iter) { - struct kernfs_open_file *of = kernfs_of(file); + struct kernfs_open_file *of = kernfs_of(iocb->ki_filp); + ssize_t len = iov_iter_count(iter); const struct kernfs_ops *ops; - ssize_t len; char *buf; if (of->atomic_write_len) { - len = count; if (len > of->atomic_write_len) return -E2BIG; } else { - len = min_t(size_t, count, PAGE_SIZE); + len = min_t(size_t, len, PAGE_SIZE); } buf = of->prealloc_buf; @@ -293,7 +274,7 @@ static ssize_t kernfs_fop_write(struct file *file, const char __user *user_buf, if (!buf) return -ENOMEM; - if (copy_from_user(buf, user_buf, len)) { + if (copy_from_iter(buf, len, iter) != len) { len = -EFAULT; goto out_free; } @@ -312,7 +293,7 @@ static ssize_t kernfs_fop_write(struct file *file, const char __user *user_buf, ops = kernfs_ops(of->kn); if (ops->write) - len = ops->write(of, buf, len, *ppos); + len = ops->write(of, buf, len, iocb->ki_pos); else len = -EINVAL; @@ -320,7 +301,7 @@ static ssize_t kernfs_fop_write(struct file *file, const char __user *user_buf, mutex_unlock(&of->mutex); if (len > 0) - *ppos += len; + iocb->ki_pos += len; out_free: if (buf == of->prealloc_buf) @@ -673,7 +654,7 @@ static int kernfs_fop_open(struct inode *inode, struct file *file) /* * Write path needs to atomic_write_len outside active reference. - * Cache it in open_file. See kernfs_fop_write() for details. + * Cache it in open_file. See kernfs_fop_write_iter() for details. */ of->atomic_write_len = ops->atomic_write_len; @@ -960,14 +941,16 @@ void kernfs_notify(struct kernfs_node *kn) EXPORT_SYMBOL_GPL(kernfs_notify); const struct file_operations kernfs_file_fops = { - .read = kernfs_fop_read, - .write = kernfs_fop_write, + .read_iter = kernfs_fop_read_iter, + .write_iter = kernfs_fop_write_iter, .llseek = generic_file_llseek, .mmap = kernfs_fop_mmap, .open = kernfs_fop_open, .release = kernfs_fop_release, .poll = kernfs_fop_poll, .fsync = noop_fsync, + .splice_read = generic_file_splice_read, + .splice_write = iter_file_splice_write, }; /** diff --git a/fs/lockd/host.c b/fs/lockd/host.c index 0afb6d59bad03b..771c289f6df7ff 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -439,12 +439,7 @@ nlm_bind_host(struct nlm_host *host) * RPC rebind is required */ if ((clnt = host->h_rpcclnt) != NULL) { - if (time_after_eq(jiffies, host->h_nextrebind)) { - rpc_force_rebind(clnt); - host->h_nextrebind = jiffies + NLM_HOST_REBIND; - dprintk("lockd: next rebind in %lu jiffies\n", - host->h_nextrebind - jiffies); - } + nlm_rebind_host(host); } else { unsigned long increment = nlmsvc_timeout; struct rpc_timeout timeparms = { @@ -494,13 +489,20 @@ nlm_bind_host(struct nlm_host *host) return clnt; } -/* - * Force a portmap lookup of the remote lockd port +/** + * nlm_rebind_host - If needed, force a portmap lookup of the peer's lockd port + * @host: NLM host handle for peer + * + * This is not needed when using a connection-oriented protocol, such as TCP. + * The existing autobind mechanism is sufficient to force a rebind when + * required, e.g. on connection state transitions. */ void nlm_rebind_host(struct nlm_host *host) { - dprintk("lockd: rebind host %s\n", host->h_name); + if (host->h_proto != IPPROTO_UDP) + return; + if (host->h_rpcclnt && time_after_eq(jiffies, host->h_nextrebind)) { rpc_force_rebind(host->h_rpcclnt); host->h_nextrebind = jiffies + NLM_HOST_REBIND; diff --git a/fs/namespace.c b/fs/namespace.c index cebaa3e8179406..c7fbb50a5aaa5d 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -156,10 +156,10 @@ static inline void mnt_add_count(struct mount *mnt, int n) /* * vfsmount lock must be held for write */ -unsigned int mnt_get_count(struct mount *mnt) +int mnt_get_count(struct mount *mnt) { #ifdef CONFIG_SMP - unsigned int count = 0; + int count = 0; int cpu; for_each_possible_cpu(cpu) { @@ -1139,6 +1139,7 @@ static DECLARE_DELAYED_WORK(delayed_mntput_work, delayed_mntput); static void mntput_no_expire(struct mount *mnt) { LIST_HEAD(list); + int count; rcu_read_lock(); if (likely(READ_ONCE(mnt->mnt_ns))) { @@ -1162,7 +1163,9 @@ static void mntput_no_expire(struct mount *mnt) */ smp_mb(); mnt_add_count(mnt, -1); - if (mnt_get_count(mnt)) { + count = mnt_get_count(mnt); + if (count != 0) { + WARN_ON(count < 0); rcu_read_unlock(); unlock_mount_hash(); return; @@ -1710,8 +1713,6 @@ static int can_umount(const struct path *path, int flags) { struct mount *mnt = real_mount(path->mnt); - if (flags & ~(MNT_FORCE | MNT_DETACH | MNT_EXPIRE | UMOUNT_NOFOLLOW)) - return -EINVAL; if (!may_mount()) return -EPERM; if (path->dentry != path->mnt->mnt_root) @@ -1725,6 +1726,7 @@ static int can_umount(const struct path *path, int flags) return 0; } +// caller is responsible for flags being sane int path_umount(struct path *path, int flags) { struct mount *mnt = real_mount(path->mnt); @@ -1746,6 +1748,10 @@ static int ksys_umount(char __user *name, int flags) struct path path; int ret; + // basic validity checks done first + if (flags & ~(MNT_FORCE | MNT_DETACH | MNT_EXPIRE | UMOUNT_NOFOLLOW)) + return -EINVAL; + if (!(flags & UMOUNT_NOFOLLOW)) lookup_flags |= LOOKUP_FOLLOW; ret = user_path_at(AT_FDCWD, name, lookup_flags, &path); diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 816e1427f17eb1..04bf8066980c1d 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -1011,22 +1011,24 @@ nfs_delegation_find_inode_server(struct nfs_server *server, const struct nfs_fh *fhandle) { struct nfs_delegation *delegation; - struct inode *freeme, *res = NULL; + struct super_block *freeme = NULL; + struct inode *res = NULL; list_for_each_entry_rcu(delegation, &server->delegations, super_list) { spin_lock(&delegation->lock); if (delegation->inode != NULL && !test_bit(NFS_DELEGATION_REVOKED, &delegation->flags) && nfs_compare_fh(fhandle, &NFS_I(delegation->inode)->fh) == 0) { - freeme = igrab(delegation->inode); - if (freeme && nfs_sb_active(freeme->i_sb)) - res = freeme; + if (nfs_sb_active(server->super)) { + freeme = server->super; + res = igrab(delegation->inode); + } spin_unlock(&delegation->lock); if (res != NULL) return res; if (freeme) { rcu_read_unlock(); - iput(freeme); + nfs_sb_deactive(freeme); rcu_read_lock(); } return ERR_PTR(-EAGAIN); diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 24bf5797f88ae2..fd0eda328943be 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -1056,7 +1056,7 @@ static void ff_layout_resend_pnfs_read(struct nfs_pgio_header *hdr) u32 idx = hdr->pgio_mirror_idx + 1; u32 new_idx = 0; - if (ff_layout_choose_any_ds_for_read(hdr->lseg, idx + 1, &new_idx)) + if (ff_layout_choose_any_ds_for_read(hdr->lseg, idx, &new_idx)) ff_layout_send_layouterror(hdr->lseg); else pnfs_error_mark_layout_for_return(hdr->inode, hdr->lseg); diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index aa6493905bbe87..43af053f467a76 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -2180,7 +2180,7 @@ static int nfsiod_start(void) { struct workqueue_struct *wq; dprintk("RPC: creating workqueue nfsiod\n"); - wq = alloc_workqueue("nfsiod", WQ_MEM_RECLAIM, 0); + wq = alloc_workqueue("nfsiod", WQ_MEM_RECLAIM | WQ_UNBOUND, 0); if (wq == NULL) return -ENOMEM; nfsiod_workqueue = wq; diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 6673a77884d9da..98554dd18a7157 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -142,9 +142,29 @@ struct nfs_fs_context { } clone_data; }; -#define nfs_errorf(fc, fmt, ...) errorf(fc, fmt, ## __VA_ARGS__) -#define nfs_invalf(fc, fmt, ...) invalf(fc, fmt, ## __VA_ARGS__) -#define nfs_warnf(fc, fmt, ...) warnf(fc, fmt, ## __VA_ARGS__) +#define nfs_errorf(fc, fmt, ...) ((fc)->log.log ? \ + errorf(fc, fmt, ## __VA_ARGS__) : \ + ({ dprintk(fmt "\n", ## __VA_ARGS__); })) + +#define nfs_ferrorf(fc, fac, fmt, ...) ((fc)->log.log ? \ + errorf(fc, fmt, ## __VA_ARGS__) : \ + ({ dfprintk(fac, fmt "\n", ## __VA_ARGS__); })) + +#define nfs_invalf(fc, fmt, ...) ((fc)->log.log ? \ + invalf(fc, fmt, ## __VA_ARGS__) : \ + ({ dprintk(fmt "\n", ## __VA_ARGS__); -EINVAL; })) + +#define nfs_finvalf(fc, fac, fmt, ...) ((fc)->log.log ? \ + invalf(fc, fmt, ## __VA_ARGS__) : \ + ({ dfprintk(fac, fmt "\n", ## __VA_ARGS__); -EINVAL; })) + +#define nfs_warnf(fc, fmt, ...) ((fc)->log.log ? \ + warnf(fc, fmt, ## __VA_ARGS__) : \ + ({ dprintk(fmt "\n", ## __VA_ARGS__); })) + +#define nfs_fwarnf(fc, fac, fmt, ...) ((fc)->log.log ? \ + warnf(fc, fmt, ## __VA_ARGS__) : \ + ({ dfprintk(fac, fmt "\n", ## __VA_ARGS__); })) static inline struct nfs_fs_context *nfs_fc2context(const struct fs_context *fc) { @@ -585,12 +605,14 @@ extern void nfs4_test_session_trunk(struct rpc_clnt *clnt, static inline struct inode *nfs_igrab_and_active(struct inode *inode) { - inode = igrab(inode); - if (inode != NULL && !nfs_sb_active(inode->i_sb)) { - iput(inode); - inode = NULL; + struct super_block *sb = inode->i_sb; + + if (sb && nfs_sb_active(sb)) { + if (igrab(inode)) + return inode; + nfs_sb_deactive(sb); } - return inode; + return NULL; } static inline void nfs_iput_and_deactive(struct inode *inode) diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c index 8432bd6b95f08d..c078f88552695d 100644 --- a/fs/nfs/nfs42xdr.c +++ b/fs/nfs/nfs42xdr.c @@ -1019,29 +1019,24 @@ static int decode_deallocate(struct xdr_stream *xdr, struct nfs42_falloc_res *re return decode_op_hdr(xdr, OP_DEALLOCATE); } -static int decode_read_plus_data(struct xdr_stream *xdr, struct nfs_pgio_res *res, - uint32_t *eof) +static int decode_read_plus_data(struct xdr_stream *xdr, + struct nfs_pgio_res *res) { uint32_t count, recvd; uint64_t offset; __be32 *p; p = xdr_inline_decode(xdr, 8 + 4); - if (unlikely(!p)) - return -EIO; + if (!p) + return 1; p = xdr_decode_hyper(p, &offset); count = be32_to_cpup(p); recvd = xdr_align_data(xdr, res->count, count); res->count += recvd; - if (count > recvd) { - dprintk("NFS: server cheating in read reply: " - "count %u > recvd %u\n", count, recvd); - *eof = 0; + if (count > recvd) return 1; - } - return 0; } @@ -1052,18 +1047,16 @@ static int decode_read_plus_hole(struct xdr_stream *xdr, struct nfs_pgio_res *re __be32 *p; p = xdr_inline_decode(xdr, 8 + 8); - if (unlikely(!p)) - return -EIO; + if (!p) + return 1; p = xdr_decode_hyper(p, &offset); p = xdr_decode_hyper(p, &length); recvd = xdr_expand_hole(xdr, res->count, length); res->count += recvd; - if (recvd < length) { - *eof = 0; + if (recvd < length) return 1; - } return 0; } @@ -1088,12 +1081,12 @@ static int decode_read_plus(struct xdr_stream *xdr, struct nfs_pgio_res *res) for (i = 0; i < segments; i++) { p = xdr_inline_decode(xdr, 4); - if (unlikely(!p)) - return -EIO; + if (!p) + goto early_out; type = be32_to_cpup(p++); if (type == NFS4_CONTENT_DATA) - status = decode_read_plus_data(xdr, res, &eof); + status = decode_read_plus_data(xdr, res); else if (type == NFS4_CONTENT_HOLE) status = decode_read_plus_hole(xdr, res, &eof); else @@ -1102,12 +1095,17 @@ static int decode_read_plus(struct xdr_stream *xdr, struct nfs_pgio_res *res) if (status < 0) return status; if (status > 0) - break; + goto early_out; } out: res->eof = eof; return 0; +early_out: + if (unlikely(!i)) + return -EIO; + res->eof = 0; + return 0; } static int decode_seek(struct xdr_stream *xdr, struct nfs42_seek_res *res) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index e89468678ae161..0cd5b127f3bb94 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3534,10 +3534,8 @@ static void nfs4_close_done(struct rpc_task *task, void *data) trace_nfs4_close(state, &calldata->arg, &calldata->res, task->tk_status); /* Handle Layoutreturn errors */ - if (pnfs_roc_done(task, calldata->inode, - &calldata->arg.lr_args, - &calldata->res.lr_res, - &calldata->res.lr_ret) == -EAGAIN) + if (pnfs_roc_done(task, &calldata->arg.lr_args, &calldata->res.lr_res, + &calldata->res.lr_ret) == -EAGAIN) goto out_restart; /* hmm. we are done with the inode, and in the process of freeing @@ -4961,12 +4959,12 @@ static int _nfs4_proc_readdir(struct dentry *dentry, const struct cred *cred, u64 cookie, struct page **pages, unsigned int count, bool plus) { struct inode *dir = d_inode(dentry); + struct nfs_server *server = NFS_SERVER(dir); struct nfs4_readdir_arg args = { .fh = NFS_FH(dir), .pages = pages, .pgbase = 0, .count = count, - .bitmask = NFS_SERVER(d_inode(dentry))->attr_bitmask, .plus = plus, }; struct nfs4_readdir_res res; @@ -4981,9 +4979,15 @@ static int _nfs4_proc_readdir(struct dentry *dentry, const struct cred *cred, dprintk("%s: dentry = %pd2, cookie = %Lu\n", __func__, dentry, (unsigned long long)cookie); + if (!(server->caps & NFS_CAP_SECURITY_LABEL)) + args.bitmask = server->attr_bitmask_nl; + else + args.bitmask = server->attr_bitmask; + nfs4_setup_readdir(cookie, NFS_I(dir)->cookieverf, dentry, &args); res.pgbase = args.pgbase; - status = nfs4_call_sync(NFS_SERVER(dir)->client, NFS_SERVER(dir), &msg, &args.seq_args, &res.seq_res, 0); + status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, + &res.seq_res, 0); if (status >= 0) { memcpy(NFS_I(dir)->cookieverf, res.verifier.data, NFS4_VERIFIER_SIZE); status += args.pgbase; @@ -6373,10 +6377,8 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) trace_nfs4_delegreturn_exit(&data->args, &data->res, task->tk_status); /* Handle Layoutreturn errors */ - if (pnfs_roc_done(task, data->inode, - &data->args.lr_args, - &data->res.lr_res, - &data->res.lr_ret) == -EAGAIN) + if (pnfs_roc_done(task, &data->args.lr_args, &data->res.lr_res, + &data->res.lr_ret) == -EAGAIN) goto out_restart; switch (task->tk_status) { @@ -6430,10 +6432,10 @@ static void nfs4_delegreturn_release(void *calldata) struct nfs4_delegreturndata *data = calldata; struct inode *inode = data->inode; + if (data->lr.roc) + pnfs_roc_release(&data->lr.arg, &data->lr.res, + data->res.lr_ret); if (inode) { - if (data->lr.roc) - pnfs_roc_release(&data->lr.arg, &data->lr.res, - data->res.lr_ret); nfs_post_op_update_inode_force_wcc(inode, &data->fattr); nfs_iput_and_deactive(inode); } @@ -6509,16 +6511,14 @@ static int _nfs4_proc_delegreturn(struct inode *inode, const struct cred *cred, nfs_fattr_init(data->res.fattr); data->timestamp = jiffies; data->rpc_status = 0; - data->lr.roc = pnfs_roc(inode, &data->lr.arg, &data->lr.res, cred); data->inode = nfs_igrab_and_active(inode); - if (data->inode) { + if (data->inode || issync) { + data->lr.roc = pnfs_roc(inode, &data->lr.arg, &data->lr.res, + cred); if (data->lr.roc) { data->args.lr_args = &data->lr.arg; data->res.lr_res = &data->lr.res; } - } else if (data->lr.roc) { - pnfs_roc_release(&data->lr.arg, &data->lr.res, 0); - data->lr.roc = false; } task_setup_data.callback_data = data; @@ -7100,9 +7100,9 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f data->arg.new_lock_owner, ret); } else data->cancelled = true; + trace_nfs4_set_lock(fl, state, &data->res.stateid, cmd, ret); rpc_put_task(task); dprintk("%s: done, ret = %d!\n", __func__, ret); - trace_nfs4_set_lock(fl, state, &data->res.stateid, cmd, ret); return ret; } diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c index 93f5c1678ec291..d09bcfd7db8948 100644 --- a/fs/nfs/nfs4super.c +++ b/fs/nfs/nfs4super.c @@ -67,7 +67,7 @@ static void nfs4_evict_inode(struct inode *inode) nfs_inode_evict_delegation(inode); /* Note that above delegreturn would trigger pnfs return-on-close */ pnfs_return_layout(inode); - pnfs_destroy_layout(NFS_I(inode)); + pnfs_destroy_layout_final(NFS_I(inode)); /* First call standard NFS clear_inode() code */ nfs_clear_inode(inode); nfs4_xattr_cache_zap(inode); @@ -227,7 +227,7 @@ int nfs4_try_get_tree(struct fs_context *fc) fc, ctx->nfs_server.hostname, ctx->nfs_server.export_path); if (err) { - nfs_errorf(fc, "NFS4: Couldn't follow remote path"); + nfs_ferrorf(fc, MOUNT, "NFS4: Couldn't follow remote path"); dfprintk(MOUNT, "<-- nfs4_try_get_tree() = %d [error]\n", err); } else { dfprintk(MOUNT, "<-- nfs4_try_get_tree() = 0\n"); @@ -250,7 +250,7 @@ int nfs4_get_referral_tree(struct fs_context *fc) fc, ctx->nfs_server.hostname, ctx->nfs_server.export_path); if (err) { - nfs_errorf(fc, "NFS4: Couldn't follow remote path"); + nfs_ferrorf(fc, MOUNT, "NFS4: Couldn't follow remote path"); dfprintk(MOUNT, "<-- nfs4_get_referral_tree() = %d [error]\n", err); } else { dfprintk(MOUNT, "<-- nfs4_get_referral_tree() = 0\n"); diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index c6dbfcae75171e..c16b93df1bc142 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -3009,15 +3009,19 @@ static void nfs4_xdr_enc_getdeviceinfo(struct rpc_rqst *req, struct compound_hdr hdr = { .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; + uint32_t replen; encode_compound_hdr(xdr, req, &hdr); encode_sequence(xdr, &args->seq_args, &hdr); + + replen = hdr.replen + op_decode_hdr_maxsz; + encode_getdeviceinfo(xdr, args, &hdr); - /* set up reply kvec. Subtract notification bitmap max size (2) - * so that notification bitmap is put in xdr_buf tail */ + /* set up reply kvec. device_addr4 opaque data is read into the + * pages */ rpc_prepare_reply_pages(req, args->pdev->pages, args->pdev->pgbase, - args->pdev->pglen, hdr.replen - 2); + args->pdev->pglen, replen + 2 + 1); encode_nops(&hdr); } diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 0e50b9d45c3209..cbadcf6ca4da2a 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -294,6 +294,7 @@ void pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo) { struct inode *inode; + unsigned long i_state; if (!lo) return; @@ -304,8 +305,12 @@ pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo) if (!list_empty(&lo->plh_segs)) WARN_ONCE(1, "NFS: BUG unfreed layout segments.\n"); pnfs_detach_layout_hdr(lo); + i_state = inode->i_state; spin_unlock(&inode->i_lock); pnfs_free_layout_hdr(lo); + /* Notify pnfs_destroy_layout_final() that we're done */ + if (i_state & (I_FREEING | I_CLEAR)) + wake_up_var(lo); } } @@ -319,6 +324,21 @@ pnfs_grab_inode_layout_hdr(struct pnfs_layout_hdr *lo) return NULL; } +/* + * Compare 2 layout stateid sequence ids, to see which is newer, + * taking into account wraparound issues. + */ +static bool pnfs_seqid_is_newer(u32 s1, u32 s2) +{ + return (s32)(s1 - s2) > 0; +} + +static void pnfs_barrier_update(struct pnfs_layout_hdr *lo, u32 newseq) +{ + if (pnfs_seqid_is_newer(newseq, lo->plh_barrier)) + lo->plh_barrier = newseq; +} + static void pnfs_set_plh_return_info(struct pnfs_layout_hdr *lo, enum pnfs_iomode iomode, u32 seq) @@ -330,6 +350,7 @@ pnfs_set_plh_return_info(struct pnfs_layout_hdr *lo, enum pnfs_iomode iomode, if (seq != 0) { WARN_ON_ONCE(lo->plh_return_seq != 0 && lo->plh_return_seq != seq); lo->plh_return_seq = seq; + pnfs_barrier_update(lo, seq); } } @@ -634,15 +655,6 @@ static int mark_lseg_invalid(struct pnfs_layout_segment *lseg, return rv; } -/* - * Compare 2 layout stateid sequence ids, to see which is newer, - * taking into account wraparound issues. - */ -static bool pnfs_seqid_is_newer(u32 s1, u32 s2) -{ - return (s32)(s1 - s2) > 0; -} - static bool pnfs_should_free_range(const struct pnfs_layout_range *lseg_range, const struct pnfs_layout_range *recall_range) @@ -734,8 +746,7 @@ pnfs_free_lseg_list(struct list_head *free_me) } } -void -pnfs_destroy_layout(struct nfs_inode *nfsi) +static struct pnfs_layout_hdr *__pnfs_destroy_layout(struct nfs_inode *nfsi) { struct pnfs_layout_hdr *lo; LIST_HEAD(tmp_list); @@ -753,9 +764,34 @@ pnfs_destroy_layout(struct nfs_inode *nfsi) pnfs_put_layout_hdr(lo); } else spin_unlock(&nfsi->vfs_inode.i_lock); + return lo; +} + +void pnfs_destroy_layout(struct nfs_inode *nfsi) +{ + __pnfs_destroy_layout(nfsi); } EXPORT_SYMBOL_GPL(pnfs_destroy_layout); +static bool pnfs_layout_removed(struct nfs_inode *nfsi, + struct pnfs_layout_hdr *lo) +{ + bool ret; + + spin_lock(&nfsi->vfs_inode.i_lock); + ret = nfsi->layout != lo; + spin_unlock(&nfsi->vfs_inode.i_lock); + return ret; +} + +void pnfs_destroy_layout_final(struct nfs_inode *nfsi) +{ + struct pnfs_layout_hdr *lo = __pnfs_destroy_layout(nfsi); + + if (lo) + wait_var_event(lo, pnfs_layout_removed(nfsi, lo)); +} + static bool pnfs_layout_add_bulk_destroy_list(struct inode *inode, struct list_head *layout_list) @@ -955,8 +991,7 @@ pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new, new_barrier = be32_to_cpu(new->seqid); else if (new_barrier == 0) return; - if (pnfs_seqid_is_newer(new_barrier, lo->plh_barrier)) - lo->plh_barrier = new_barrier; + pnfs_barrier_update(lo, new_barrier); } static bool @@ -1154,20 +1189,17 @@ pnfs_prepare_layoutreturn(struct pnfs_layout_hdr *lo, return false; set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags); pnfs_get_layout_hdr(lo); + nfs4_stateid_copy(stateid, &lo->plh_stateid); + *cred = get_cred(lo->plh_lc_cred); if (test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags)) { - nfs4_stateid_copy(stateid, &lo->plh_stateid); - *cred = get_cred(lo->plh_lc_cred); if (lo->plh_return_seq != 0) stateid->seqid = cpu_to_be32(lo->plh_return_seq); if (iomode != NULL) *iomode = lo->plh_return_iomode; pnfs_clear_layoutreturn_info(lo); - return true; - } - nfs4_stateid_copy(stateid, &lo->plh_stateid); - *cred = get_cred(lo->plh_lc_cred); - if (iomode != NULL) + } else if (iomode != NULL) *iomode = IOMODE_ANY; + pnfs_barrier_update(lo, be32_to_cpu(stateid->seqid)); return true; } @@ -1480,10 +1512,8 @@ bool pnfs_roc(struct inode *ino, return false; } -int pnfs_roc_done(struct rpc_task *task, struct inode *inode, - struct nfs4_layoutreturn_args **argpp, - struct nfs4_layoutreturn_res **respp, - int *ret) +int pnfs_roc_done(struct rpc_task *task, struct nfs4_layoutreturn_args **argpp, + struct nfs4_layoutreturn_res **respp, int *ret) { struct nfs4_layoutreturn_args *arg = *argpp; int retval = -EAGAIN; @@ -1516,7 +1546,7 @@ int pnfs_roc_done(struct rpc_task *task, struct inode *inode, return 0; case -NFS4ERR_OLD_STATEID: if (!nfs4_layout_refresh_old_stateid(&arg->stateid, - &arg->range, inode)) + &arg->range, arg->inode)) break; *ret = -NFS4ERR_NOMATCHING_LAYOUT; return -EAGAIN; @@ -1531,12 +1561,18 @@ void pnfs_roc_release(struct nfs4_layoutreturn_args *args, int ret) { struct pnfs_layout_hdr *lo = args->layout; + struct inode *inode = args->inode; const nfs4_stateid *arg_stateid = NULL; const nfs4_stateid *res_stateid = NULL; struct nfs4_xdr_opaque_data *ld_private = args->ld_private; switch (ret) { case -NFS4ERR_NOMATCHING_LAYOUT: + spin_lock(&inode->i_lock); + if (pnfs_layout_is_valid(lo) && + nfs4_stateid_match_other(&args->stateid, &lo->plh_stateid)) + pnfs_set_plh_return_info(lo, args->range.iomode, 0); + spin_unlock(&inode->i_lock); break; case 0: if (res->lrs_present) @@ -1986,6 +2022,27 @@ pnfs_update_layout(struct inode *ino, goto lookup_again; } + /* + * Because we free lsegs when sending LAYOUTRETURN, we need to wait + * for LAYOUTRETURN. + */ + if (test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) { + spin_unlock(&ino->i_lock); + dprintk("%s wait for layoutreturn\n", __func__); + lseg = ERR_PTR(pnfs_prepare_to_retry_layoutget(lo)); + if (!IS_ERR(lseg)) { + pnfs_put_layout_hdr(lo); + dprintk("%s retrying\n", __func__); + trace_pnfs_update_layout(ino, pos, count, iomode, lo, + lseg, + PNFS_UPDATE_LAYOUT_RETRY); + goto lookup_again; + } + trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, + PNFS_UPDATE_LAYOUT_RETURN); + goto out_put_layout_hdr; + } + lseg = pnfs_find_lseg(lo, &arg, strict_iomode); if (lseg) { trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, @@ -2038,28 +2095,6 @@ pnfs_update_layout(struct inode *ino, nfs4_stateid_copy(&stateid, &lo->plh_stateid); } - /* - * Because we free lsegs before sending LAYOUTRETURN, we need to wait - * for LAYOUTRETURN even if first is true. - */ - if (test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) { - spin_unlock(&ino->i_lock); - dprintk("%s wait for layoutreturn\n", __func__); - lseg = ERR_PTR(pnfs_prepare_to_retry_layoutget(lo)); - if (!IS_ERR(lseg)) { - if (first) - pnfs_clear_first_layoutget(lo); - pnfs_put_layout_hdr(lo); - dprintk("%s retrying\n", __func__); - trace_pnfs_update_layout(ino, pos, count, iomode, lo, - lseg, PNFS_UPDATE_LAYOUT_RETRY); - goto lookup_again; - } - trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, - PNFS_UPDATE_LAYOUT_RETURN); - goto out_put_layout_hdr; - } - if (pnfs_layoutgets_blocked(lo)) { trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, PNFS_UPDATE_LAYOUT_BLOCKED); @@ -2213,6 +2248,7 @@ static void _lgopen_prepare_attached(struct nfs4_opendata *data, &rng, GFP_KERNEL); if (!lgp) { pnfs_clear_first_layoutget(lo); + nfs_layoutget_end(lo); pnfs_put_layout_hdr(lo); return; } @@ -2385,6 +2421,7 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) spin_unlock(&ino->i_lock); lseg->pls_layout = lo; NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg); + pnfs_free_lseg_list(&free_me); return ERR_PTR(-EAGAIN); } diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 2661c44c62db40..132a345e937311 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -266,6 +266,7 @@ struct pnfs_layout_segment *pnfs_layout_process(struct nfs4_layoutget *lgp); void pnfs_layoutget_free(struct nfs4_layoutget *lgp); void pnfs_free_lseg_list(struct list_head *tmp_list); void pnfs_destroy_layout(struct nfs_inode *); +void pnfs_destroy_layout_final(struct nfs_inode *); void pnfs_destroy_all_layouts(struct nfs_client *); int pnfs_destroy_layouts_byfsid(struct nfs_client *clp, struct nfs_fsid *fsid, @@ -294,10 +295,8 @@ bool pnfs_roc(struct inode *ino, struct nfs4_layoutreturn_args *args, struct nfs4_layoutreturn_res *res, const struct cred *cred); -int pnfs_roc_done(struct rpc_task *task, struct inode *inode, - struct nfs4_layoutreturn_args **argpp, - struct nfs4_layoutreturn_res **respp, - int *ret); +int pnfs_roc_done(struct rpc_task *task, struct nfs4_layoutreturn_args **argpp, + struct nfs4_layoutreturn_res **respp, int *ret); void pnfs_roc_release(struct nfs4_layoutreturn_args *args, struct nfs4_layoutreturn_res *res, int ret); @@ -710,6 +709,10 @@ static inline void pnfs_destroy_layout(struct nfs_inode *nfsi) { } +static inline void pnfs_destroy_layout_final(struct nfs_inode *nfsi) +{ +} + static inline struct pnfs_layout_segment * pnfs_get_lseg(struct pnfs_layout_segment *lseg) { @@ -765,7 +768,7 @@ pnfs_roc(struct inode *ino, } static inline int -pnfs_roc_done(struct rpc_task *task, struct inode *inode, +pnfs_roc_done(struct rpc_task *task, struct nfs4_layoutreturn_args **argpp, struct nfs4_layoutreturn_res **respp, int *ret) diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index 679767ac258d0c..e3b25822e0bb19 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -78,22 +78,18 @@ void pnfs_generic_clear_request_commit(struct nfs_page *req, struct nfs_commit_info *cinfo) { - struct pnfs_layout_segment *freeme = NULL; + struct pnfs_commit_bucket *bucket = NULL; if (!test_and_clear_bit(PG_COMMIT_TO_DS, &req->wb_flags)) goto out; cinfo->ds->nwritten--; - if (list_is_singular(&req->wb_list)) { - struct pnfs_commit_bucket *bucket; - + if (list_is_singular(&req->wb_list)) bucket = list_first_entry(&req->wb_list, - struct pnfs_commit_bucket, - written); - freeme = pnfs_free_bucket_lseg(bucket); - } + struct pnfs_commit_bucket, written); out: nfs_request_remove_commit_list(req, cinfo); - pnfs_put_lseg(freeme); + if (bucket) + pnfs_put_lseg(pnfs_free_bucket_lseg(bucket)); } EXPORT_SYMBOL_GPL(pnfs_generic_clear_request_commit); @@ -407,12 +403,16 @@ pnfs_bucket_get_committing(struct list_head *head, struct pnfs_commit_bucket *bucket, struct nfs_commit_info *cinfo) { + struct pnfs_layout_segment *lseg; struct list_head *pos; list_for_each(pos, &bucket->committing) cinfo->ds->ncommitting--; list_splice_init(&bucket->committing, head); - return pnfs_free_bucket_lseg(bucket); + lseg = pnfs_free_bucket_lseg(bucket); + if (!lseg) + lseg = pnfs_get_lseg(bucket->lseg); + return lseg; } static struct nfs_commit_data * @@ -424,8 +424,6 @@ pnfs_bucket_fetch_commitdata(struct pnfs_commit_bucket *bucket, if (!data) return NULL; data->lseg = pnfs_bucket_get_committing(&data->pages, bucket, cinfo); - if (!data->lseg) - data->lseg = pnfs_get_lseg(bucket->lseg); return data; } diff --git a/fs/nfs_common/grace.c b/fs/nfs_common/grace.c index b73d9dd37f73cc..26f2a50eceac9d 100644 --- a/fs/nfs_common/grace.c +++ b/fs/nfs_common/grace.c @@ -69,10 +69,14 @@ __state_in_grace(struct net *net, bool open) if (!open) return !list_empty(grace_list); + spin_lock(&grace_lock); list_for_each_entry(lm, grace_list, list) { - if (lm->block_opens) + if (lm->block_opens) { + spin_unlock(&grace_lock); return true; + } } + spin_unlock(&grace_lock); return false; } diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 3c6c2f7d1688b4..5849c1bd88f17a 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -600,7 +600,7 @@ static struct notifier_block nfsd_file_lease_notifier = { static int nfsd_file_fsnotify_handle_event(struct fsnotify_mark *mark, u32 mask, struct inode *inode, struct inode *dir, - const struct qstr *name) + const struct qstr *name, u32 cookie) { trace_nfsd_file_fsnotify_handle_event(inode, mask); diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 2277f83da25012..716566da400e1b 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -863,9 +863,14 @@ compose_entry_fh(struct nfsd3_readdirres *cd, struct svc_fh *fhp, if (isdotent(name, namlen)) { if (namlen == 2) { dchild = dget_parent(dparent); - /* filesystem root - cannot return filehandle for ".." */ + /* + * Don't return filehandle for ".." if we're at + * the filesystem or export root: + */ if (dchild == dparent) goto out; + if (dparent == exp->ex_path.dentry) + goto out; } else dchild = dget(dparent); } else diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index d7f27ed6b79410..47006eec724e61 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -769,6 +769,7 @@ static int nfs4_init_cp_state(struct nfsd_net *nn, copy_stateid_t *stid, spin_lock(&nn->s2s_cp_lock); new_id = idr_alloc_cyclic(&nn->s2s_cp_stateids, stid, 0, 0, GFP_NOWAIT); stid->stid.si_opaque.so_id = new_id; + stid->stid.si_generation = 1; spin_unlock(&nn->s2s_cp_lock); idr_preload_end(); if (new_id < 0) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 833a2c64dfe808..5f5169b9c2e904 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -4632,6 +4632,7 @@ nfsd4_encode_read_plus_data(struct nfsd4_compoundres *resp, resp->rqstp->rq_vec, read->rd_vlen, maxcount, eof); if (nfserr) return nfserr; + xdr_truncate_encode(xdr, starting_len + 16 + xdr_align_size(*maxcount)); tmp = htonl(NFS4_CONTENT_DATA); write_bytes_to_xdr_buf(xdr->buf, starting_len, &tmp, 4); @@ -4639,6 +4640,10 @@ nfsd4_encode_read_plus_data(struct nfsd4_compoundres *resp, write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp64, 8); tmp = htonl(*maxcount); write_bytes_to_xdr_buf(xdr->buf, starting_len + 12, &tmp, 4); + + tmp = xdr_zero; + write_bytes_to_xdr_buf(xdr->buf, starting_len + 16 + *maxcount, &tmp, + xdr_pad_size(*maxcount)); return nfs_ok; } @@ -4731,14 +4736,15 @@ nfsd4_encode_read_plus(struct nfsd4_compoundres *resp, __be32 nfserr, if (nfserr && segments == 0) xdr_truncate_encode(xdr, starting_len); else { - tmp = htonl(eof); - write_bytes_to_xdr_buf(xdr->buf, starting_len, &tmp, 4); - tmp = htonl(segments); - write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp, 4); if (nfserr) { xdr_truncate_encode(xdr, last_segment); nfserr = nfs_ok; + eof = 0; } + tmp = htonl(eof); + write_bytes_to_xdr_buf(xdr->buf, starting_len, &tmp, 4); + tmp = htonl(segments); + write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp, 4); } return nfserr; diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 27b1ad1361508b..9323e30a7eafeb 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -527,8 +527,7 @@ static void nfsd_last_thread(struct svc_serv *serv, struct net *net) return; nfsd_shutdown_net(net); - printk(KERN_WARNING "nfsd: last server has exited, flushing export " - "cache\n"); + pr_info("nfsd: last server has exited, flushing export cache\n"); nfsd_export_flush(net); } diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c index 5dcda8f20c04f9..e45ca6ecba9591 100644 --- a/fs/notify/dnotify/dnotify.c +++ b/fs/notify/dnotify/dnotify.c @@ -72,7 +72,7 @@ static void dnotify_recalc_inode_mask(struct fsnotify_mark *fsn_mark) */ static int dnotify_handle_event(struct fsnotify_mark *inode_mark, u32 mask, struct inode *inode, struct inode *dir, - const struct qstr *name) + const struct qstr *name, u32 cookie) { struct dnotify_mark *dn_mark; struct dnotify_struct *dn; diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index 9167884a61eca9..1192c995362007 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -268,12 +268,11 @@ static u32 fanotify_group_event_mask(struct fsnotify_group *group, continue; /* - * If the event is for a child and this mark is on a parent not + * If the event is on a child and this mark is on a parent not * watching children, don't send it! */ - if (event_mask & FS_EVENT_ON_CHILD && - type == FSNOTIFY_OBJ_TYPE_INODE && - !(mark->mask & FS_EVENT_ON_CHILD)) + if (type == FSNOTIFY_OBJ_TYPE_PARENT && + !(mark->mask & FS_EVENT_ON_CHILD)) continue; marks_mask |= mark->mask; diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 3e01d8f2ab9061..dcab112e1f0012 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -1285,26 +1285,23 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, return ret; } +#ifndef CONFIG_ARCH_SPLIT_ARG64 SYSCALL_DEFINE5(fanotify_mark, int, fanotify_fd, unsigned int, flags, __u64, mask, int, dfd, const char __user *, pathname) { return do_fanotify_mark(fanotify_fd, flags, mask, dfd, pathname); } +#endif -#ifdef CONFIG_COMPAT -COMPAT_SYSCALL_DEFINE6(fanotify_mark, +#if defined(CONFIG_ARCH_SPLIT_ARG64) || defined(CONFIG_COMPAT) +SYSCALL32_DEFINE6(fanotify_mark, int, fanotify_fd, unsigned int, flags, - __u32, mask0, __u32, mask1, int, dfd, + SC_ARG64(mask), int, dfd, const char __user *, pathname) { - return do_fanotify_mark(fanotify_fd, flags, -#ifdef __BIG_ENDIAN - ((__u64)mask0 << 32) | mask1, -#else - ((__u64)mask1 << 32) | mask0, -#endif - dfd, pathname); + return do_fanotify_mark(fanotify_fd, flags, SC_VAL64(__u64, mask), + dfd, pathname); } #endif diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 8d3ad5ef292587..30d422b8c0fc7e 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -152,6 +152,13 @@ static bool fsnotify_event_needs_parent(struct inode *inode, struct mount *mnt, if (mask & FS_ISDIR) return false; + /* + * All events that are possible on child can also may be reported with + * parent/name info to inode/sb/mount. Otherwise, a watching parent + * could result in events reported with unexpected name info to sb/mount. + */ + BUILD_BUG_ON(FS_EVENTS_POSS_ON_CHILD & ~FS_EVENTS_POSS_TO_PARENT); + /* Did either inode/sb/mount subscribe for events with parent/name? */ marks_mask |= fsnotify_parent_needed_mask(inode->i_fsnotify_mask); marks_mask |= fsnotify_parent_needed_mask(inode->i_sb->s_fsnotify_mask); @@ -232,47 +239,76 @@ int __fsnotify_parent(struct dentry *dentry, __u32 mask, const void *data, } EXPORT_SYMBOL_GPL(__fsnotify_parent); +static int fsnotify_handle_inode_event(struct fsnotify_group *group, + struct fsnotify_mark *inode_mark, + u32 mask, const void *data, int data_type, + struct inode *dir, const struct qstr *name, + u32 cookie) +{ + const struct path *path = fsnotify_data_path(data, data_type); + struct inode *inode = fsnotify_data_inode(data, data_type); + const struct fsnotify_ops *ops = group->ops; + + if (WARN_ON_ONCE(!ops->handle_inode_event)) + return 0; + + if ((inode_mark->mask & FS_EXCL_UNLINK) && + path && d_unlinked(path->dentry)) + return 0; + + /* Check interest of this mark in case event was sent with two marks */ + if (!(mask & inode_mark->mask & ALL_FSNOTIFY_EVENTS)) + return 0; + + return ops->handle_inode_event(inode_mark, mask, inode, dir, name, cookie); +} + static int fsnotify_handle_event(struct fsnotify_group *group, __u32 mask, const void *data, int data_type, struct inode *dir, const struct qstr *name, u32 cookie, struct fsnotify_iter_info *iter_info) { struct fsnotify_mark *inode_mark = fsnotify_iter_inode_mark(iter_info); - struct fsnotify_mark *child_mark = fsnotify_iter_child_mark(iter_info); - struct inode *inode = fsnotify_data_inode(data, data_type); - const struct fsnotify_ops *ops = group->ops; + struct fsnotify_mark *parent_mark = fsnotify_iter_parent_mark(iter_info); int ret; - if (WARN_ON_ONCE(!ops->handle_inode_event)) - return 0; - if (WARN_ON_ONCE(fsnotify_iter_sb_mark(iter_info)) || WARN_ON_ONCE(fsnotify_iter_vfsmount_mark(iter_info))) return 0; - /* - * An event can be sent on child mark iterator instead of inode mark - * iterator because of other groups that have interest of this inode - * and have marks on both parent and child. We can simplify this case. - */ - if (!inode_mark) { - inode_mark = child_mark; - child_mark = NULL; + if (parent_mark) { + /* + * parent_mark indicates that the parent inode is watching + * children and interested in this event, which is an event + * possible on child. But is *this mark* watching children and + * interested in this event? + */ + if (parent_mark->mask & FS_EVENT_ON_CHILD) { + ret = fsnotify_handle_inode_event(group, parent_mark, mask, + data, data_type, dir, name, 0); + if (ret) + return ret; + } + if (!inode_mark) + return 0; + } + + if (mask & FS_EVENT_ON_CHILD) { + /* + * Some events can be sent on both parent dir and child marks + * (e.g. FS_ATTRIB). If both parent dir and child are + * watching, report the event once to parent dir with name (if + * interested) and once to child without name (if interested). + * The child watcher is expecting an event without a file name + * and without the FS_EVENT_ON_CHILD flag. + */ + mask &= ~FS_EVENT_ON_CHILD; dir = NULL; name = NULL; } - ret = ops->handle_inode_event(inode_mark, mask, inode, dir, name); - if (ret || !child_mark) - return ret; - - /* - * Some events can be sent on both parent dir and child marks - * (e.g. FS_ATTRIB). If both parent dir and child are watching, - * report the event once to parent dir with name and once to child - * without name. - */ - return ops->handle_inode_event(child_mark, mask, inode, NULL, NULL); + return fsnotify_handle_inode_event(group, inode_mark, mask, data, data_type, + dir, name, cookie); } static int send_to_group(__u32 mask, const void *data, int data_type, @@ -430,7 +466,7 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir, struct fsnotify_iter_info iter_info = {}; struct super_block *sb; struct mount *mnt = NULL; - struct inode *child = NULL; + struct inode *parent = NULL; int ret = 0; __u32 test_mask, marks_mask; @@ -442,11 +478,10 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir, inode = dir; } else if (mask & FS_EVENT_ON_CHILD) { /* - * Event on child - report on TYPE_INODE to dir if it is - * watching children and on TYPE_CHILD to child. + * Event on child - report on TYPE_PARENT to dir if it is + * watching children and on TYPE_INODE to child. */ - child = inode; - inode = dir; + parent = dir; } sb = inode->i_sb; @@ -460,7 +495,7 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir, if (!sb->s_fsnotify_marks && (!mnt || !mnt->mnt_fsnotify_marks) && (!inode || !inode->i_fsnotify_marks) && - (!child || !child->i_fsnotify_marks)) + (!parent || !parent->i_fsnotify_marks)) return 0; marks_mask = sb->s_fsnotify_mask; @@ -468,8 +503,8 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir, marks_mask |= mnt->mnt_fsnotify_mask; if (inode) marks_mask |= inode->i_fsnotify_mask; - if (child) - marks_mask |= child->i_fsnotify_mask; + if (parent) + marks_mask |= parent->i_fsnotify_mask; /* @@ -492,9 +527,9 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir, iter_info.marks[FSNOTIFY_OBJ_TYPE_INODE] = fsnotify_first_mark(&inode->i_fsnotify_marks); } - if (child) { - iter_info.marks[FSNOTIFY_OBJ_TYPE_CHILD] = - fsnotify_first_mark(&child->i_fsnotify_marks); + if (parent) { + iter_info.marks[FSNOTIFY_OBJ_TYPE_PARENT] = + fsnotify_first_mark(&parent->i_fsnotify_marks); } /* diff --git a/fs/notify/inotify/inotify.h b/fs/notify/inotify/inotify.h index 4327d0e9c36459..2007e371191600 100644 --- a/fs/notify/inotify/inotify.h +++ b/fs/notify/inotify/inotify.h @@ -24,11 +24,10 @@ static inline struct inotify_event_info *INOTIFY_E(struct fsnotify_event *fse) extern void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark, struct fsnotify_group *group); -extern int inotify_handle_event(struct fsnotify_group *group, u32 mask, - const void *data, int data_type, - struct inode *dir, - const struct qstr *file_name, u32 cookie, - struct fsnotify_iter_info *iter_info); +extern int inotify_handle_inode_event(struct fsnotify_mark *inode_mark, + u32 mask, struct inode *inode, + struct inode *dir, + const struct qstr *name, u32 cookie); extern const struct fsnotify_ops inotify_fsnotify_ops; extern struct kmem_cache *inotify_inode_mark_cachep; diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c index 9ddcbadc98e29e..1901d799909b89 100644 --- a/fs/notify/inotify/inotify_fsnotify.c +++ b/fs/notify/inotify/inotify_fsnotify.c @@ -55,25 +55,21 @@ static int inotify_merge(struct list_head *list, return event_compare(last_event, event); } -static int inotify_one_event(struct fsnotify_group *group, u32 mask, - struct fsnotify_mark *inode_mark, - const struct path *path, - const struct qstr *file_name, u32 cookie) +int inotify_handle_inode_event(struct fsnotify_mark *inode_mark, u32 mask, + struct inode *inode, struct inode *dir, + const struct qstr *name, u32 cookie) { struct inotify_inode_mark *i_mark; struct inotify_event_info *event; struct fsnotify_event *fsn_event; + struct fsnotify_group *group = inode_mark->group; int ret; int len = 0; int alloc_len = sizeof(struct inotify_event_info); struct mem_cgroup *old_memcg; - if ((inode_mark->mask & FS_EXCL_UNLINK) && - path && d_unlinked(path->dentry)) - return 0; - - if (file_name) { - len = file_name->len; + if (name) { + len = name->len; alloc_len += len + 1; } @@ -117,7 +113,7 @@ static int inotify_one_event(struct fsnotify_group *group, u32 mask, event->sync_cookie = cookie; event->name_len = len; if (len) - strcpy(event->name, file_name->name); + strcpy(event->name, name->name); ret = fsnotify_add_event(group, fsn_event, inotify_merge); if (ret) { @@ -131,37 +127,6 @@ static int inotify_one_event(struct fsnotify_group *group, u32 mask, return 0; } -int inotify_handle_event(struct fsnotify_group *group, u32 mask, - const void *data, int data_type, struct inode *dir, - const struct qstr *file_name, u32 cookie, - struct fsnotify_iter_info *iter_info) -{ - const struct path *path = fsnotify_data_path(data, data_type); - struct fsnotify_mark *inode_mark = fsnotify_iter_inode_mark(iter_info); - struct fsnotify_mark *child_mark = fsnotify_iter_child_mark(iter_info); - int ret = 0; - - if (WARN_ON(fsnotify_iter_vfsmount_mark(iter_info))) - return 0; - - /* - * Some events cannot be sent on both parent and child marks - * (e.g. IN_CREATE). Those events are always sent on inode_mark. - * For events that are possible on both parent and child (e.g. IN_OPEN), - * event is sent on inode_mark with name if the parent is watching and - * is sent on child_mark without name if child is watching. - * If both parent and child are watching, report the event with child's - * name here and report another event without child's name below. - */ - if (inode_mark) - ret = inotify_one_event(group, mask, inode_mark, path, - file_name, cookie); - if (ret || !child_mark) - return ret; - - return inotify_one_event(group, mask, child_mark, path, NULL, 0); -} - static void inotify_freeing_mark(struct fsnotify_mark *fsn_mark, struct fsnotify_group *group) { inotify_ignored_and_remove_idr(fsn_mark, group); @@ -227,7 +192,7 @@ static void inotify_free_mark(struct fsnotify_mark *fsn_mark) } const struct fsnotify_ops inotify_fsnotify_ops = { - .handle_event = inotify_handle_event, + .handle_inode_event = inotify_handle_inode_event, .free_group_priv = inotify_free_group_priv, .free_event = inotify_free_event, .freeing_mark = inotify_freeing_mark, diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index 186722ba389476..5f6c6bf65909cd 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -486,14 +486,10 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark, struct fsnotify_group *group) { struct inotify_inode_mark *i_mark; - struct fsnotify_iter_info iter_info = { }; - - fsnotify_iter_set_report_type_mark(&iter_info, FSNOTIFY_OBJ_TYPE_INODE, - fsn_mark); /* Queue ignore event for the watch */ - inotify_handle_event(group, FS_IN_IGNORED, NULL, FSNOTIFY_EVENT_NONE, - NULL, NULL, 0, &iter_info); + inotify_handle_inode_event(fsn_mark, FS_IN_IGNORED, NULL, NULL, NULL, + 0); i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark); /* remove this mark from the idr */ diff --git a/fs/open.c b/fs/open.c index 9af548fb841b00..4d7537ae59df50 100644 --- a/fs/open.c +++ b/fs/open.c @@ -1010,6 +1010,10 @@ inline int build_open_flags(const struct open_how *how, struct open_flags *op) if (how->resolve & ~VALID_RESOLVE_FLAGS) return -EINVAL; + /* Scoping flags are mutually exclusive. */ + if ((how->resolve & RESOLVE_BENEATH) && (how->resolve & RESOLVE_IN_ROOT)) + return -EINVAL; + /* Deal with the mode. */ if (WILL_CREATE(flags)) { if (how->mode & ~S_IALLUGO) diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c index efccb7c1f9bc53..a1f72ac053e5f4 100644 --- a/fs/overlayfs/file.c +++ b/fs/overlayfs/file.c @@ -541,46 +541,31 @@ static long ovl_real_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct fd real; - const struct cred *old_cred; long ret; ret = ovl_real_fdget(file, &real); if (ret) return ret; - old_cred = ovl_override_creds(file_inode(file)->i_sb); ret = security_file_ioctl(real.file, cmd, arg); - if (!ret) + if (!ret) { + /* + * Don't override creds, since we currently can't safely check + * permissions before doing so. + */ ret = vfs_ioctl(real.file, cmd, arg); - revert_creds(old_cred); + } fdput(real); return ret; } -static unsigned int ovl_iflags_to_fsflags(unsigned int iflags) -{ - unsigned int flags = 0; - - if (iflags & S_SYNC) - flags |= FS_SYNC_FL; - if (iflags & S_APPEND) - flags |= FS_APPEND_FL; - if (iflags & S_IMMUTABLE) - flags |= FS_IMMUTABLE_FL; - if (iflags & S_NOATIME) - flags |= FS_NOATIME_FL; - - return flags; -} - static long ovl_ioctl_set_flags(struct file *file, unsigned int cmd, - unsigned long arg, unsigned int flags) + unsigned long arg) { long ret; struct inode *inode = file_inode(file); - unsigned int oldflags; if (!inode_owner_or_capable(inode)) return -EACCES; @@ -591,10 +576,13 @@ static long ovl_ioctl_set_flags(struct file *file, unsigned int cmd, inode_lock(inode); - /* Check the capability before cred override */ - oldflags = ovl_iflags_to_fsflags(READ_ONCE(inode->i_flags)); - ret = vfs_ioc_setflags_prepare(inode, oldflags, flags); - if (ret) + /* + * Prevent copy up if immutable and has no CAP_LINUX_IMMUTABLE + * capability. + */ + ret = -EPERM; + if (!ovl_has_upperdata(inode) && IS_IMMUTABLE(inode) && + !capable(CAP_LINUX_IMMUTABLE)) goto unlock; ret = ovl_maybe_copy_up(file_dentry(file), O_WRONLY); @@ -613,46 +601,6 @@ static long ovl_ioctl_set_flags(struct file *file, unsigned int cmd, } -static long ovl_ioctl_set_fsflags(struct file *file, unsigned int cmd, - unsigned long arg) -{ - unsigned int flags; - - if (get_user(flags, (int __user *) arg)) - return -EFAULT; - - return ovl_ioctl_set_flags(file, cmd, arg, flags); -} - -static unsigned int ovl_fsxflags_to_fsflags(unsigned int xflags) -{ - unsigned int flags = 0; - - if (xflags & FS_XFLAG_SYNC) - flags |= FS_SYNC_FL; - if (xflags & FS_XFLAG_APPEND) - flags |= FS_APPEND_FL; - if (xflags & FS_XFLAG_IMMUTABLE) - flags |= FS_IMMUTABLE_FL; - if (xflags & FS_XFLAG_NOATIME) - flags |= FS_NOATIME_FL; - - return flags; -} - -static long ovl_ioctl_set_fsxflags(struct file *file, unsigned int cmd, - unsigned long arg) -{ - struct fsxattr fa; - - memset(&fa, 0, sizeof(fa)); - if (copy_from_user(&fa, (void __user *) arg, sizeof(fa))) - return -EFAULT; - - return ovl_ioctl_set_flags(file, cmd, arg, - ovl_fsxflags_to_fsflags(fa.fsx_xflags)); -} - long ovl_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { long ret; @@ -663,12 +611,9 @@ long ovl_ioctl(struct file *file, unsigned int cmd, unsigned long arg) ret = ovl_real_ioctl(file, cmd, arg); break; - case FS_IOC_SETFLAGS: - ret = ovl_ioctl_set_fsflags(file, cmd, arg); - break; - case FS_IOC_FSSETXATTR: - ret = ovl_ioctl_set_fsxflags(file, cmd, arg); + case FS_IOC_SETFLAGS: + ret = ovl_ioctl_set_flags(file, cmd, arg); break; default: diff --git a/fs/pipe.c b/fs/pipe.c index 0ac197658a2d6e..412b3b618994c9 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -1206,6 +1206,7 @@ const struct file_operations pipefifo_fops = { .unlocked_ioctl = pipe_ioctl, .release = pipe_release, .fasync = pipe_fasync, + .splice_write = iter_file_splice_write, }; /* diff --git a/fs/pnode.h b/fs/pnode.h index 49a058c73e4c77..26f74e092bd981 100644 --- a/fs/pnode.h +++ b/fs/pnode.h @@ -44,7 +44,7 @@ int propagate_mount_busy(struct mount *, int); void propagate_mount_unlock(struct mount *); void mnt_release_group_id(struct mount *); int get_dominating_id(struct mount *mnt, const struct path *root); -unsigned int mnt_get_count(struct mount *mnt); +int mnt_get_count(struct mount *mnt); void mnt_set_mountpoint(struct mount *, struct mountpoint *, struct mount *); void mnt_change_mountpoint(struct mount *parent, struct mountpoint *mp, diff --git a/fs/proc/base.c b/fs/proc/base.c index b362523a9829ac..55ce0ee9c5c734 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -405,11 +405,11 @@ static int proc_pid_wchan(struct seq_file *m, struct pid_namespace *ns, static int lock_trace(struct task_struct *task) { - int err = mutex_lock_killable(&task->signal->exec_update_mutex); + int err = down_read_killable(&task->signal->exec_update_lock); if (err) return err; if (!ptrace_may_access(task, PTRACE_MODE_ATTACH_FSCREDS)) { - mutex_unlock(&task->signal->exec_update_mutex); + up_read(&task->signal->exec_update_lock); return -EPERM; } return 0; @@ -417,7 +417,7 @@ static int lock_trace(struct task_struct *task) static void unlock_trace(struct task_struct *task) { - mutex_unlock(&task->signal->exec_update_mutex); + up_read(&task->signal->exec_update_lock); } #ifdef CONFIG_STACKTRACE @@ -2930,7 +2930,7 @@ static int do_io_accounting(struct task_struct *task, struct seq_file *m, int wh unsigned long flags; int result; - result = mutex_lock_killable(&task->signal->exec_update_mutex); + result = down_read_killable(&task->signal->exec_update_lock); if (result) return result; @@ -2966,7 +2966,7 @@ static int do_io_accounting(struct task_struct *task, struct seq_file *m, int wh result = 0; out_unlock: - mutex_unlock(&task->signal->exec_update_mutex); + up_read(&task->signal->exec_update_lock); return result; } diff --git a/fs/proc/generic.c b/fs/proc/generic.c index b84663252adda0..6c0a05f55d6b12 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -349,6 +349,16 @@ static const struct file_operations proc_dir_operations = { .iterate_shared = proc_readdir, }; +static int proc_net_d_revalidate(struct dentry *dentry, unsigned int flags) +{ + return 0; +} + +const struct dentry_operations proc_net_dentry_ops = { + .d_revalidate = proc_net_d_revalidate, + .d_delete = always_delete_dentry, +}; + /* * proc directories can do almost nothing.. */ @@ -471,8 +481,8 @@ struct proc_dir_entry *proc_symlink(const char *name, } EXPORT_SYMBOL(proc_symlink); -struct proc_dir_entry *proc_mkdir_data(const char *name, umode_t mode, - struct proc_dir_entry *parent, void *data) +struct proc_dir_entry *_proc_mkdir(const char *name, umode_t mode, + struct proc_dir_entry *parent, void *data, bool force_lookup) { struct proc_dir_entry *ent; @@ -484,10 +494,20 @@ struct proc_dir_entry *proc_mkdir_data(const char *name, umode_t mode, ent->data = data; ent->proc_dir_ops = &proc_dir_operations; ent->proc_iops = &proc_dir_inode_operations; + if (force_lookup) { + pde_force_lookup(ent); + } ent = proc_register(parent, ent); } return ent; } +EXPORT_SYMBOL_GPL(_proc_mkdir); + +struct proc_dir_entry *proc_mkdir_data(const char *name, umode_t mode, + struct proc_dir_entry *parent, void *data) +{ + return _proc_mkdir(name, mode, parent, data, false); +} EXPORT_SYMBOL_GPL(proc_mkdir_data); struct proc_dir_entry *proc_mkdir_mode(const char *name, umode_t mode, diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 917cc85e346630..afbe96b6bf77de 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -310,3 +310,10 @@ extern unsigned long task_statm(struct mm_struct *, unsigned long *, unsigned long *, unsigned long *, unsigned long *); extern void task_mem(struct seq_file *, struct mm_struct *); + +extern const struct dentry_operations proc_net_dentry_ops; +static inline void pde_force_lookup(struct proc_dir_entry *pde) +{ + /* /proc/net/ entries can be changed under us by setns(CLONE_NEWNET) */ + pde->proc_dops = &proc_net_dentry_ops; +} diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c index ed8a6306990c43..1aa9236bf1af58 100644 --- a/fs/proc/proc_net.c +++ b/fs/proc/proc_net.c @@ -39,22 +39,6 @@ static struct net *get_proc_net(const struct inode *inode) return maybe_get_net(PDE_NET(PDE(inode))); } -static int proc_net_d_revalidate(struct dentry *dentry, unsigned int flags) -{ - return 0; -} - -static const struct dentry_operations proc_net_dentry_ops = { - .d_revalidate = proc_net_d_revalidate, - .d_delete = always_delete_dentry, -}; - -static void pde_force_lookup(struct proc_dir_entry *pde) -{ - /* /proc/net/ entries can be changed under us by setns(CLONE_NEWNET) */ - pde->proc_dops = &proc_net_dentry_ops; -} - static int seq_open_net(struct inode *inode, struct file *file) { unsigned int state_size = PDE(inode)->state_size; diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 317899222d7fdf..d2018f70d1fae2 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -1770,6 +1770,12 @@ static int process_sysctl_arg(char *param, char *val, return 0; } + if (!val) + return -EINVAL; + len = strlen(val); + if (len == 0) + return -EINVAL; + /* * To set sysctl options, we use a temporary mount of proc, look up the * respective sys/ file and write to it. To avoid mounting it when no @@ -1811,7 +1817,6 @@ static int process_sysctl_arg(char *param, char *val, file, param, val); goto out; } - len = strlen(val); wret = kernel_write(file, val, len, &pos); if (wret < 0) { err = wret; diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index ee5a235b305627..602e3a52884d88 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -1035,6 +1035,25 @@ struct clear_refs_private { }; #ifdef CONFIG_MEM_SOFT_DIRTY + +#define is_cow_mapping(flags) (((flags) & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE) + +static inline bool pte_is_pinned(struct vm_area_struct *vma, unsigned long addr, pte_t pte) +{ + struct page *page; + + if (!pte_write(pte)) + return false; + if (!is_cow_mapping(vma->vm_flags)) + return false; + if (likely(!atomic_read(&vma->vm_mm->has_pinned))) + return false; + page = vm_normal_page(vma, addr, pte); + if (!page) + return false; + return page_maybe_dma_pinned(page); +} + static inline void clear_soft_dirty(struct vm_area_struct *vma, unsigned long addr, pte_t *pte) { @@ -1049,6 +1068,8 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma, if (pte_present(ptent)) { pte_t old_pte; + if (pte_is_pinned(vma, addr, ptent)) + return; old_pte = ptep_modify_prot_start(vma, addr, pte); ptent = pte_wrprotect(old_pte); ptent = pte_clear_soft_dirty(ptent); @@ -1215,41 +1236,26 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, .type = type, }; + if (mmap_write_lock_killable(mm)) { + count = -EINTR; + goto out_mm; + } if (type == CLEAR_REFS_MM_HIWATER_RSS) { - if (mmap_write_lock_killable(mm)) { - count = -EINTR; - goto out_mm; - } - /* * Writing 5 to /proc/pid/clear_refs resets the peak * resident set size to this mm's current rss value. */ reset_mm_hiwater_rss(mm); - mmap_write_unlock(mm); - goto out_mm; + goto out_unlock; } - if (mmap_read_lock_killable(mm)) { - count = -EINTR; - goto out_mm; - } tlb_gather_mmu(&tlb, mm, 0, -1); if (type == CLEAR_REFS_SOFT_DIRTY) { for (vma = mm->mmap; vma; vma = vma->vm_next) { if (!(vma->vm_flags & VM_SOFTDIRTY)) continue; - mmap_read_unlock(mm); - if (mmap_write_lock_killable(mm)) { - count = -EINTR; - goto out_mm; - } - for (vma = mm->mmap; vma; vma = vma->vm_next) { - vma->vm_flags &= ~VM_SOFTDIRTY; - vma_set_page_prot(vma); - } - mmap_write_downgrade(mm); - break; + vma->vm_flags &= ~VM_SOFTDIRTY; + vma_set_page_prot(vma); } mmu_notifier_range_init(&range, MMU_NOTIFY_SOFT_DIRTY, @@ -1261,7 +1267,8 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, if (type == CLEAR_REFS_SOFT_DIRTY) mmu_notifier_invalidate_range_end(&range); tlb_finish_mmu(&tlb, 0, -1); - mmap_read_unlock(mm); +out_unlock: + mmap_write_unlock(mm); out_mm: mmput(mm); } diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c index e59d4bb3a89e44..eafb75755fa371 100644 --- a/fs/proc_namespace.c +++ b/fs/proc_namespace.c @@ -320,7 +320,8 @@ static int mountstats_open(struct inode *inode, struct file *file) const struct file_operations proc_mounts_operations = { .open = mounts_open, - .read = seq_read, + .read_iter = seq_read_iter, + .splice_read = generic_file_splice_read, .llseek = seq_lseek, .release = mounts_release, .poll = mounts_poll, @@ -328,7 +329,8 @@ const struct file_operations proc_mounts_operations = { const struct file_operations proc_mountinfo_operations = { .open = mountinfo_open, - .read = seq_read, + .read_iter = seq_read_iter, + .splice_read = generic_file_splice_read, .llseek = seq_lseek, .release = mounts_release, .poll = mounts_poll, @@ -336,7 +338,8 @@ const struct file_operations proc_mountinfo_operations = { const struct file_operations proc_mountstats_operations = { .open = mountstats_open, - .read = seq_read, + .read_iter = seq_read_iter, + .splice_read = generic_file_splice_read, .llseek = seq_lseek, .release = mounts_release, }; diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index bb02989d92b618..4f137346376600 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -2455,7 +2455,7 @@ int dquot_resume(struct super_block *sb, int type) ret = dquot_load_quota_sb(sb, cnt, dqopt->info[cnt].dqi_fmt_id, flags); if (ret < 0) - vfs_cleanup_quota_inode(sb, type); + vfs_cleanup_quota_inode(sb, cnt); } return ret; diff --git a/fs/quota/quota_tree.c b/fs/quota/quota_tree.c index a6f856f341dc7b..c5562c871c8bef 100644 --- a/fs/quota/quota_tree.c +++ b/fs/quota/quota_tree.c @@ -62,7 +62,7 @@ static ssize_t read_blk(struct qtree_mem_dqinfo *info, uint blk, char *buf) memset(buf, 0, info->dqi_usable_bs); return sb->s_op->quota_read(sb, info->dqi_type, buf, - info->dqi_usable_bs, blk << info->dqi_blocksize_bits); + info->dqi_usable_bs, (loff_t)blk << info->dqi_blocksize_bits); } static ssize_t write_blk(struct qtree_mem_dqinfo *info, uint blk, char *buf) @@ -71,7 +71,7 @@ static ssize_t write_blk(struct qtree_mem_dqinfo *info, uint blk, char *buf) ssize_t ret; ret = sb->s_op->quota_write(sb, info->dqi_type, buf, - info->dqi_usable_bs, blk << info->dqi_blocksize_bits); + info->dqi_usable_bs, (loff_t)blk << info->dqi_blocksize_bits); if (ret != info->dqi_usable_bs) { quota_error(sb, "dquota write failed"); if (ret >= 0) @@ -284,7 +284,7 @@ static uint find_free_dqentry(struct qtree_mem_dqinfo *info, blk); goto out_buf; } - dquot->dq_off = (blk << info->dqi_blocksize_bits) + + dquot->dq_off = ((loff_t)blk << info->dqi_blocksize_bits) + sizeof(struct qt_disk_dqdbheader) + i * info->dqi_entry_size; kfree(buf); @@ -559,7 +559,7 @@ static loff_t find_block_dqentry(struct qtree_mem_dqinfo *info, ret = -EIO; goto out_buf; } else { - ret = (blk << info->dqi_blocksize_bits) + sizeof(struct + ret = ((loff_t)blk << info->dqi_blocksize_bits) + sizeof(struct qt_disk_dqdbheader) + i * info->dqi_entry_size; } out_buf: diff --git a/fs/quota/quota_v2.c b/fs/quota/quota_v2.c index e69a2bfdd81c05..c21106557a37e2 100644 --- a/fs/quota/quota_v2.c +++ b/fs/quota/quota_v2.c @@ -157,6 +157,25 @@ static int v2_read_file_info(struct super_block *sb, int type) qinfo->dqi_entry_size = sizeof(struct v2r1_disk_dqblk); qinfo->dqi_ops = &v2r1_qtree_ops; } + ret = -EUCLEAN; + /* Some sanity checks of the read headers... */ + if ((loff_t)qinfo->dqi_blocks << qinfo->dqi_blocksize_bits > + i_size_read(sb_dqopt(sb)->files[type])) { + quota_error(sb, "Number of blocks too big for quota file size (%llu > %llu).", + (loff_t)qinfo->dqi_blocks << qinfo->dqi_blocksize_bits, + i_size_read(sb_dqopt(sb)->files[type])); + goto out; + } + if (qinfo->dqi_free_blk >= qinfo->dqi_blocks) { + quota_error(sb, "Free block number too big (%u >= %u).", + qinfo->dqi_free_blk, qinfo->dqi_blocks); + goto out; + } + if (qinfo->dqi_free_entry >= qinfo->dqi_blocks) { + quota_error(sb, "Block with free entry too big (%u >= %u).", + qinfo->dqi_free_entry, qinfo->dqi_blocks); + goto out; + } ret = 0; out: up_read(&dqopt->dqio_sem); diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c index 8bf88d690729e3..476a7ff494822f 100644 --- a/fs/reiserfs/stree.c +++ b/fs/reiserfs/stree.c @@ -454,6 +454,12 @@ static int is_leaf(char *buf, int blocksize, struct buffer_head *bh) "(second one): %h", ih); return 0; } + if (is_direntry_le_ih(ih) && (ih_item_len(ih) < (ih_entry_count(ih) * IH_SIZE))) { + reiserfs_warning(NULL, "reiserfs-5093", + "item entry count seems wrong %h", + ih); + return 0; + } prev_location = ih_location(ih); } diff --git a/fs/select.c b/fs/select.c index ebfebdfe5c69a1..37aaa8317f3ae1 100644 --- a/fs/select.c +++ b/fs/select.c @@ -1011,14 +1011,17 @@ static int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds, fdcount = do_poll(head, &table, end_time); poll_freewait(&table); + if (!user_write_access_begin(ufds, nfds * sizeof(*ufds))) + goto out_fds; + for (walk = head; walk; walk = walk->next) { struct pollfd *fds = walk->entries; int j; - for (j = 0; j < walk->len; j++, ufds++) - if (__put_user(fds[j].revents, &ufds->revents)) - goto out_fds; + for (j = walk->len; j; fds++, ufds++, j--) + unsafe_put_user(fds->revents, &ufds->revents, Efault); } + user_write_access_end(); err = fdcount; out_fds: @@ -1030,6 +1033,11 @@ static int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds, } return err; + +Efault: + user_write_access_end(); + err = -EFAULT; + goto out_fds; } static long do_restart_poll(struct restart_block *restart_block) diff --git a/fs/ubifs/auth.c b/fs/ubifs/auth.c index b93b3cd10bfd32..8c50de693e1d4f 100644 --- a/fs/ubifs/auth.c +++ b/fs/ubifs/auth.c @@ -338,8 +338,10 @@ int ubifs_init_authentication(struct ubifs_info *c) c->authenticated = true; c->log_hash = ubifs_hash_get_desc(c); - if (IS_ERR(c->log_hash)) + if (IS_ERR(c->log_hash)) { + err = PTR_ERR(c->log_hash); goto out_free_hmac; + } err = 0; diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index 155521e51ac576..08fde777c32473 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -270,6 +270,15 @@ static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry, return d_splice_alias(inode, dentry); } +static int ubifs_prepare_create(struct inode *dir, struct dentry *dentry, + struct fscrypt_name *nm) +{ + if (fscrypt_is_nokey_name(dentry)) + return -ENOKEY; + + return fscrypt_setup_filename(dir, &dentry->d_name, 0, nm); +} + static int ubifs_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool excl) { @@ -293,7 +302,7 @@ static int ubifs_create(struct inode *dir, struct dentry *dentry, umode_t mode, if (err) return err; - err = fscrypt_setup_filename(dir, &dentry->d_name, 0, &nm); + err = ubifs_prepare_create(dir, dentry, &nm); if (err) goto out_budg; @@ -953,7 +962,7 @@ static int ubifs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) if (err) return err; - err = fscrypt_setup_filename(dir, &dentry->d_name, 0, &nm); + err = ubifs_prepare_create(dir, dentry, &nm); if (err) goto out_budg; @@ -1038,7 +1047,7 @@ static int ubifs_mknod(struct inode *dir, struct dentry *dentry, return err; } - err = fscrypt_setup_filename(dir, &dentry->d_name, 0, &nm); + err = ubifs_prepare_create(dir, dentry, &nm); if (err) { kfree(dev); goto out_budg; @@ -1122,7 +1131,7 @@ static int ubifs_symlink(struct inode *dir, struct dentry *dentry, if (err) return err; - err = fscrypt_setup_filename(dir, &dentry->d_name, 0, &nm); + err = ubifs_prepare_create(dir, dentry, &nm); if (err) goto out_budg; diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c index 7e4bfaf2871fa0..eae9cf5a57b059 100644 --- a/fs/ubifs/io.c +++ b/fs/ubifs/io.c @@ -319,7 +319,7 @@ void ubifs_pad(const struct ubifs_info *c, void *buf, int pad) { uint32_t crc; - ubifs_assert(c, pad >= 0 && !(pad & 7)); + ubifs_assert(c, pad >= 0); if (pad >= UBIFS_PAD_NODE_SZ) { struct ubifs_ch *ch = buf; @@ -764,6 +764,10 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) * write-buffer. */ memcpy(wbuf->buf + wbuf->used, buf, len); + if (aligned_len > len) { + ubifs_assert(c, aligned_len - len < 8); + ubifs_pad(c, wbuf->buf + wbuf->used + len, aligned_len - len); + } if (aligned_len == wbuf->avail) { dbg_io("flush jhead %s wbuf to LEB %d:%d", @@ -856,13 +860,18 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) } spin_lock(&wbuf->lock); - if (aligned_len) + if (aligned_len) { /* * And now we have what's left and what does not take whole * max. write unit, so write it to the write-buffer and we are * done. */ memcpy(wbuf->buf, buf + written, len); + if (aligned_len > len) { + ubifs_assert(c, aligned_len - len < 8); + ubifs_pad(c, wbuf->buf + len, aligned_len - len); + } + } if (c->leb_size - wbuf->offs >= c->max_write_size) wbuf->size = c->max_write_size; diff --git a/fs/udf/super.c b/fs/udf/super.c index 5bef3a68395d8d..d0df217f4712a5 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -705,6 +705,7 @@ static int udf_check_vsd(struct super_block *sb) struct buffer_head *bh = NULL; int nsr = 0; struct udf_sb_info *sbi; + loff_t session_offset; sbi = UDF_SB(sb); if (sb->s_blocksize < sizeof(struct volStructDesc)) @@ -712,7 +713,8 @@ static int udf_check_vsd(struct super_block *sb) else sectorsize = sb->s_blocksize; - sector += (((loff_t)sbi->s_session) << sb->s_blocksize_bits); + session_offset = (loff_t)sbi->s_session << sb->s_blocksize_bits; + sector += session_offset; udf_debug("Starting at sector %u (%lu byte sectors)\n", (unsigned int)(sector >> sb->s_blocksize_bits), @@ -757,8 +759,7 @@ static int udf_check_vsd(struct super_block *sb) if (nsr > 0) return 1; - else if (!bh && sector - (sbi->s_session << sb->s_blocksize_bits) == - VSD_FIRST_SECTOR_OFFSET) + else if (!bh && sector - session_offset == VSD_FIRST_SECTOR_OFFSET) return -1; else return 0; diff --git a/fs/zonefs/Kconfig b/fs/zonefs/Kconfig index ef2697b78820d4..827278f937fe7f 100644 --- a/fs/zonefs/Kconfig +++ b/fs/zonefs/Kconfig @@ -3,6 +3,7 @@ config ZONEFS_FS depends on BLOCK depends on BLK_DEV_ZONED select FS_IOMAP + select CRC32 help zonefs is a simple file system which exposes zones of a zoned block device (e.g. host-managed or host-aware SMR disk drives) as files. diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index a3abcc4b7d9ff7..6d1879bf94403e 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -620,7 +620,6 @@ acpi_status acpi_remove_pm_notifier(struct acpi_device *adev); bool acpi_pm_device_can_wakeup(struct device *dev); int acpi_pm_device_sleep_state(struct device *, int *, int); int acpi_pm_set_device_wakeup(struct device *dev, bool enable); -int acpi_pm_set_bridge_wakeup(struct device *dev, bool enable); #else static inline void acpi_pm_wakeup_event(struct device *dev) { @@ -651,10 +650,6 @@ static inline int acpi_pm_set_device_wakeup(struct device *dev, bool enable) { return -ENODEV; } -static inline int acpi_pm_set_bridge_wakeup(struct device *dev, bool enable) -{ - return -ENODEV; -} #endif #ifdef CONFIG_ACPI_SYSTEM_POWER_STATES_SUPPORT diff --git a/include/asm-generic/Kbuild b/include/asm-generic/Kbuild index e78bbb9a07e90b..d1300c6e0a471c 100644 --- a/include/asm-generic/Kbuild +++ b/include/asm-generic/Kbuild @@ -34,6 +34,7 @@ mandatory-y += kmap_types.h mandatory-y += kprobes.h mandatory-y += linkage.h mandatory-y += local.h +mandatory-y += local64.h mandatory-y += mm-arch-hooks.h mandatory-y += mmiowb.h mandatory-y += mmu.h diff --git a/include/asm-generic/bitops/atomic.h b/include/asm-generic/bitops/atomic.h index dd90c9792909d1..0e7316a86240b8 100644 --- a/include/asm-generic/bitops/atomic.h +++ b/include/asm-generic/bitops/atomic.h @@ -11,19 +11,19 @@ * See Documentation/atomic_bitops.txt for details. */ -static inline void set_bit(unsigned int nr, volatile unsigned long *p) +static __always_inline void set_bit(unsigned int nr, volatile unsigned long *p) { p += BIT_WORD(nr); atomic_long_or(BIT_MASK(nr), (atomic_long_t *)p); } -static inline void clear_bit(unsigned int nr, volatile unsigned long *p) +static __always_inline void clear_bit(unsigned int nr, volatile unsigned long *p) { p += BIT_WORD(nr); atomic_long_andnot(BIT_MASK(nr), (atomic_long_t *)p); } -static inline void change_bit(unsigned int nr, volatile unsigned long *p) +static __always_inline void change_bit(unsigned int nr, volatile unsigned long *p) { p += BIT_WORD(nr); atomic_long_xor(BIT_MASK(nr), (atomic_long_t *)p); diff --git a/include/dt-bindings/sound/apq8016-lpass.h b/include/dt-bindings/sound/apq8016-lpass.h index 3c3e16c0aadbfd..dc605c4bc22491 100644 --- a/include/dt-bindings/sound/apq8016-lpass.h +++ b/include/dt-bindings/sound/apq8016-lpass.h @@ -2,9 +2,8 @@ #ifndef __DT_APQ8016_LPASS_H #define __DT_APQ8016_LPASS_H -#define MI2S_PRIMARY 0 -#define MI2S_SECONDARY 1 -#define MI2S_TERTIARY 2 -#define MI2S_QUATERNARY 3 +#include + +/* NOTE: Use qcom,lpass.h to define any AIF ID's for LPASS */ #endif /* __DT_APQ8016_LPASS_H */ diff --git a/include/dt-bindings/sound/qcom,lpass.h b/include/dt-bindings/sound/qcom,lpass.h new file mode 100644 index 00000000000000..7b0b80b38699e4 --- /dev/null +++ b/include/dt-bindings/sound/qcom,lpass.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __DT_QCOM_LPASS_H +#define __DT_QCOM_LPASS_H + +#define MI2S_PRIMARY 0 +#define MI2S_SECONDARY 1 +#define MI2S_TERTIARY 2 +#define MI2S_QUATERNARY 3 +#define MI2S_QUINARY 4 + +#define LPASS_DP_RX 5 + +#define LPASS_MCLK0 0 + +#endif /* __DT_QCOM_LPASS_H */ diff --git a/include/dt-bindings/sound/sc7180-lpass.h b/include/dt-bindings/sound/sc7180-lpass.h index 56ecaafd2dc682..5c1ee8b36b1972 100644 --- a/include/dt-bindings/sound/sc7180-lpass.h +++ b/include/dt-bindings/sound/sc7180-lpass.h @@ -2,10 +2,8 @@ #ifndef __DT_SC7180_LPASS_H #define __DT_SC7180_LPASS_H -#define MI2S_PRIMARY 0 -#define MI2S_SECONDARY 1 -#define LPASS_DP_RX 2 +#include -#define LPASS_MCLK0 0 +/* NOTE: Use qcom,lpass.h to define any AIF ID's for LPASS */ #endif /* __DT_APQ8016_LPASS_H */ diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 39263c6b52e1aa..5b1dc1ad4fb322 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -885,6 +885,13 @@ static inline int acpi_device_modalias(struct device *dev, return -ENODEV; } +static inline struct platform_device * +acpi_create_platform_device(struct acpi_device *adev, + struct property_entry *properties) +{ + return NULL; +} + static inline bool acpi_dma_supported(struct acpi_device *adev) { return false; diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 794b2a33a2c36c..f8ea27423d1d85 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -446,8 +446,8 @@ enum { BLK_MQ_REQ_NOWAIT = (__force blk_mq_req_flags_t)(1 << 0), /* allocate from reserved pool */ BLK_MQ_REQ_RESERVED = (__force blk_mq_req_flags_t)(1 << 1), - /* set RQF_PREEMPT */ - BLK_MQ_REQ_PREEMPT = (__force blk_mq_req_flags_t)(1 << 3), + /* set RQF_PM */ + BLK_MQ_REQ_PM = (__force blk_mq_req_flags_t)(1 << 2), }; struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op, diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 033eb5f73b6540..542471b76f4106 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -79,9 +79,6 @@ typedef __u32 __bitwise req_flags_t; #define RQF_MQ_INFLIGHT ((__force req_flags_t)(1 << 6)) /* don't call prep for this one */ #define RQF_DONTPREP ((__force req_flags_t)(1 << 7)) -/* set for "ide_preempt" requests and also for requests for which the SCSI - "quiesce" state must be ignored. */ -#define RQF_PREEMPT ((__force req_flags_t)(1 << 8)) /* vaguely specified driver internal error. Ignored by the block layer */ #define RQF_FAILED ((__force req_flags_t)(1 << 10)) /* don't warn about errors */ @@ -430,8 +427,7 @@ struct request_queue { unsigned long queue_flags; /* * Number of contexts that have called blk_set_pm_only(). If this - * counter is above zero then only RQF_PM and RQF_PREEMPT requests are - * processed. + * counter is above zero then only RQF_PM requests are processed. */ atomic_t pm_only; @@ -696,6 +692,18 @@ static inline bool queue_is_mq(struct request_queue *q) return q->mq_ops; } +#ifdef CONFIG_PM +static inline enum rpm_status queue_rpm_status(struct request_queue *q) +{ + return q->rpm_status; +} +#else +static inline enum rpm_status queue_rpm_status(struct request_queue *q) +{ + return RPM_ACTIVE; +} +#endif + static inline enum blk_zoned_model blk_queue_zoned_model(struct request_queue *q) { diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 74c6c0486eed78..555ab0fddbef7d 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -13,6 +13,12 @@ /* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58145 */ #if GCC_VERSION < 40900 # error Sorry, your version of GCC is too old - please use 4.9 or newer. +#elif defined(CONFIG_ARM64) && GCC_VERSION < 50100 +/* + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63293 + * https://lore.kernel.org/r/20210107111841.GN1551@shell.armlinux.org.uk + */ +# error Sorry, your version of GCC is too old - please use 5.1 or newer. #endif /* diff --git a/include/linux/device.h b/include/linux/device.h index 5ed101be7b2e7d..2b39de35525a90 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -615,6 +615,18 @@ static inline const char *dev_name(const struct device *dev) return kobject_name(&dev->kobj); } +/** + * dev_bus_name - Return a device's bus/class name, if at all possible + * @dev: struct device to get the bus/class name of + * + * Will return the name of the bus/class the device is attached to. If it is + * not attached to a bus/class, an empty string will be returned. + */ +static inline const char *dev_bus_name(const struct device *dev) +{ + return dev->bus ? dev->bus->name : (dev->class ? dev->class->name : ""); +} + __printf(2, 3) int dev_set_name(struct device *dev, const char *name, ...); #ifdef CONFIG_NUMA diff --git a/include/linux/dm-bufio.h b/include/linux/dm-bufio.h index 29d255fdd5d641..90bd558a17f516 100644 --- a/include/linux/dm-bufio.h +++ b/include/linux/dm-bufio.h @@ -150,6 +150,7 @@ void dm_bufio_set_minimum_buffers(struct dm_bufio_client *c, unsigned n); unsigned dm_bufio_get_block_size(struct dm_bufio_client *c); sector_t dm_bufio_get_device_size(struct dm_bufio_client *c); +struct dm_io_client *dm_bufio_get_dm_io_client(struct dm_bufio_client *c); sector_t dm_bufio_get_block_number(struct dm_buffer *b); void *dm_bufio_get_block_data(struct dm_buffer *b); void *dm_bufio_get_aux_data(struct dm_buffer *b); diff --git a/include/linux/fs.h b/include/linux/fs.h index 8667d0cdc71e76..8bde32cf971159 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2878,8 +2878,7 @@ extern int inode_needs_sync(struct inode *inode); extern int generic_delete_inode(struct inode *inode); static inline int generic_drop_inode(struct inode *inode) { - return !inode->i_nlink || inode_unhashed(inode) || - (inode->i_state & I_DONTCACHE); + return !inode->i_nlink || inode_unhashed(inode); } extern void d_mark_dontcache(struct inode *inode); diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index a8f7a43f031bd7..8e1d31c959bfae 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -111,6 +111,35 @@ static inline void fscrypt_handle_d_move(struct dentry *dentry) dentry->d_flags &= ~DCACHE_NOKEY_NAME; } +/** + * fscrypt_is_nokey_name() - test whether a dentry is a no-key name + * @dentry: the dentry to check + * + * This returns true if the dentry is a no-key dentry. A no-key dentry is a + * dentry that was created in an encrypted directory that hasn't had its + * encryption key added yet. Such dentries may be either positive or negative. + * + * When a filesystem is asked to create a new filename in an encrypted directory + * and the new filename's dentry is a no-key dentry, it must fail the operation + * with ENOKEY. This includes ->create(), ->mkdir(), ->mknod(), ->symlink(), + * ->rename(), and ->link(). (However, ->rename() and ->link() are already + * handled by fscrypt_prepare_rename() and fscrypt_prepare_link().) + * + * This is necessary because creating a filename requires the directory's + * encryption key, but just checking for the key on the directory inode during + * the final filesystem operation doesn't guarantee that the key was available + * during the preceding dentry lookup. And the key must have already been + * available during the dentry lookup in order for it to have been checked + * whether the filename already exists in the directory and for the new file's + * dentry not to be invalidated due to it incorrectly having the no-key flag. + * + * Return: %true if the dentry is a no-key name + */ +static inline bool fscrypt_is_nokey_name(const struct dentry *dentry) +{ + return dentry->d_flags & DCACHE_NOKEY_NAME; +} + /* crypto.c */ void fscrypt_enqueue_decrypt_work(struct work_struct *); @@ -244,6 +273,11 @@ static inline void fscrypt_handle_d_move(struct dentry *dentry) { } +static inline bool fscrypt_is_nokey_name(const struct dentry *dentry) +{ + return false; +} + /* crypto.c */ static inline void fscrypt_enqueue_decrypt_work(struct work_struct *work) { diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index f8529a3a29234b..a2e42d3cd87cfa 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -137,6 +137,7 @@ struct mem_cgroup; * if @file_name is not NULL, this is the directory that * @file_name is relative to. * @file_name: optional file name associated with event + * @cookie: inotify rename cookie * * free_group_priv - called when a group refcnt hits 0 to clean up the private union * freeing_mark - called when a mark is being destroyed for some reason. The group @@ -151,7 +152,7 @@ struct fsnotify_ops { struct fsnotify_iter_info *iter_info); int (*handle_inode_event)(struct fsnotify_mark *mark, u32 mask, struct inode *inode, struct inode *dir, - const struct qstr *file_name); + const struct qstr *file_name, u32 cookie); void (*free_group_priv)(struct fsnotify_group *group); void (*freeing_mark)(struct fsnotify_mark *mark, struct fsnotify_group *group); void (*free_event)(struct fsnotify_event *event); @@ -277,7 +278,7 @@ static inline const struct path *fsnotify_data_path(const void *data, enum fsnotify_obj_type { FSNOTIFY_OBJ_TYPE_INODE, - FSNOTIFY_OBJ_TYPE_CHILD, + FSNOTIFY_OBJ_TYPE_PARENT, FSNOTIFY_OBJ_TYPE_VFSMOUNT, FSNOTIFY_OBJ_TYPE_SB, FSNOTIFY_OBJ_TYPE_COUNT, @@ -285,7 +286,7 @@ enum fsnotify_obj_type { }; #define FSNOTIFY_OBJ_TYPE_INODE_FL (1U << FSNOTIFY_OBJ_TYPE_INODE) -#define FSNOTIFY_OBJ_TYPE_CHILD_FL (1U << FSNOTIFY_OBJ_TYPE_CHILD) +#define FSNOTIFY_OBJ_TYPE_PARENT_FL (1U << FSNOTIFY_OBJ_TYPE_PARENT) #define FSNOTIFY_OBJ_TYPE_VFSMOUNT_FL (1U << FSNOTIFY_OBJ_TYPE_VFSMOUNT) #define FSNOTIFY_OBJ_TYPE_SB_FL (1U << FSNOTIFY_OBJ_TYPE_SB) #define FSNOTIFY_OBJ_ALL_TYPES_MASK ((1U << FSNOTIFY_OBJ_TYPE_COUNT) - 1) @@ -330,7 +331,7 @@ static inline struct fsnotify_mark *fsnotify_iter_##name##_mark( \ } FSNOTIFY_ITER_FUNCS(inode, INODE) -FSNOTIFY_ITER_FUNCS(child, CHILD) +FSNOTIFY_ITER_FUNCS(parent, PARENT) FSNOTIFY_ITER_FUNCS(vfsmount, VFSMOUNT) FSNOTIFY_ITER_FUNCS(sb, SB) diff --git a/include/linux/iio/adc/ad_sigma_delta.h b/include/linux/iio/adc/ad_sigma_delta.h index a3a838dcf8e4a0..7199280d89ca48 100644 --- a/include/linux/iio/adc/ad_sigma_delta.h +++ b/include/linux/iio/adc/ad_sigma_delta.h @@ -79,8 +79,12 @@ struct ad_sigma_delta { /* * DMA (thus cache coherency maintenance) requires the * transfer buffers to live in their own cache lines. + * 'tx_buf' is up to 32 bits. + * 'rx_buf' is up to 32 bits per sample + 64 bit timestamp, + * rounded to 16 bytes to take into account padding. */ - uint8_t data[4] ____cacheline_aligned; + uint8_t tx_buf[4] ____cacheline_aligned; + uint8_t rx_buf[16] __aligned(8); }; static inline int ad_sigma_delta_set_channel(struct ad_sigma_delta *sd, diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index d956987ed032db..94522685a0d947 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -758,6 +758,7 @@ struct intel_svm_dev { struct list_head list; struct rcu_head rcu; struct device *dev; + struct intel_iommu *iommu; struct svm_dev_ops *ops; struct iommu_sva sva; u32 pasid; @@ -771,7 +772,6 @@ struct intel_svm { struct mmu_notifier notifier; struct mm_struct *mm; - struct intel_iommu *iommu; unsigned int flags; u32 pasid; int gpasid; /* In case that guest PASID is different from host PASID */ diff --git a/include/linux/kdev_t.h b/include/linux/kdev_t.h index 85b5151911cfd0..4856706fbfeb45 100644 --- a/include/linux/kdev_t.h +++ b/include/linux/kdev_t.h @@ -21,61 +21,61 @@ }) /* acceptable for old filesystems */ -static inline bool old_valid_dev(dev_t dev) +static __always_inline bool old_valid_dev(dev_t dev) { return MAJOR(dev) < 256 && MINOR(dev) < 256; } -static inline u16 old_encode_dev(dev_t dev) +static __always_inline u16 old_encode_dev(dev_t dev) { return (MAJOR(dev) << 8) | MINOR(dev); } -static inline dev_t old_decode_dev(u16 val) +static __always_inline dev_t old_decode_dev(u16 val) { return MKDEV((val >> 8) & 255, val & 255); } -static inline u32 new_encode_dev(dev_t dev) +static __always_inline u32 new_encode_dev(dev_t dev) { unsigned major = MAJOR(dev); unsigned minor = MINOR(dev); return (minor & 0xff) | (major << 8) | ((minor & ~0xff) << 12); } -static inline dev_t new_decode_dev(u32 dev) +static __always_inline dev_t new_decode_dev(u32 dev) { unsigned major = (dev & 0xfff00) >> 8; unsigned minor = (dev & 0xff) | ((dev >> 12) & 0xfff00); return MKDEV(major, minor); } -static inline u64 huge_encode_dev(dev_t dev) +static __always_inline u64 huge_encode_dev(dev_t dev) { return new_encode_dev(dev); } -static inline dev_t huge_decode_dev(u64 dev) +static __always_inline dev_t huge_decode_dev(u64 dev) { return new_decode_dev(dev); } -static inline int sysv_valid_dev(dev_t dev) +static __always_inline int sysv_valid_dev(dev_t dev) { return MAJOR(dev) < (1<<14) && MINOR(dev) < (1<<18); } -static inline u32 sysv_encode_dev(dev_t dev) +static __always_inline u32 sysv_encode_dev(dev_t dev) { return MINOR(dev) | (MAJOR(dev) << 18); } -static inline unsigned sysv_major(u32 dev) +static __always_inline unsigned sysv_major(u32 dev) { return (dev >> 18) & 0x3fff; } -static inline unsigned sysv_minor(u32 dev) +static __always_inline unsigned sysv_minor(u32 dev) { return dev & 0x3ffff; } diff --git a/include/linux/kthread.h b/include/linux/kthread.h index 65b81e0c494d20..2484ed97e72f58 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -33,6 +33,9 @@ struct task_struct *kthread_create_on_cpu(int (*threadfn)(void *data), unsigned int cpu, const char *namefmt); +void kthread_set_per_cpu(struct task_struct *k, int cpu); +bool kthread_is_per_cpu(struct task_struct *k); + /** * kthread_run - create and wake a thread. * @threadfn: the function to run until signal_pending(current). diff --git a/include/linux/linkage.h b/include/linux/linkage.h index 5bcfbd972e9709..dbf8506decca0f 100644 --- a/include/linux/linkage.h +++ b/include/linux/linkage.h @@ -178,6 +178,11 @@ * Objtool generates debug info for both FUNC & CODE, but needs special * annotations for each CODE's start (to describe the actual stack frame). * + * Objtool requires that all code must be contained in an ELF symbol. Symbol + * names that have a .L prefix do not emit symbol table entries. .L + * prefixed symbols can be used within a code region, but should be avoided for + * denoting a range of code via ``SYM_*_START/END`` annotations. + * * ALIAS -- does not generate debug info -- the aliased function will */ diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 0f23e1ed5e7109..add85094f9a58c 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1213,22 +1213,4 @@ static inline bool mlx5_is_roce_enabled(struct mlx5_core_dev *dev) return val.vbool; } -/** - * mlx5_core_net - Provide net namespace of the mlx5_core_dev - * @dev: mlx5 core device - * - * mlx5_core_net() returns the net namespace of mlx5 core device. - * This can be called only in below described limited context. - * (a) When a devlink instance for mlx5_core is registered and - * when devlink reload operation is disabled. - * or - * (b) during devlink reload reload_down() and reload_up callbacks - * where it is ensured that devlink instance's net namespace is - * stable. - */ -static inline struct net *mlx5_core_net(struct mlx5_core_dev *dev) -{ - return devlink_net(priv_to_devlink(dev)); -} - #endif /* MLX5_DRIVER_H */ diff --git a/include/linux/mm.h b/include/linux/mm.h index db6ae4d3fb4edc..cd5c313729ea1d 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2439,8 +2439,9 @@ extern int __meminit __early_pfn_to_nid(unsigned long pfn, #endif extern void set_dma_reserve(unsigned long new_dma_reserve); -extern void memmap_init_zone(unsigned long, int, unsigned long, unsigned long, - enum meminit_context, struct vmem_altmap *, int migratetype); +extern void memmap_init_zone(unsigned long, int, unsigned long, + unsigned long, unsigned long, enum meminit_context, + struct vmem_altmap *, int migratetype); extern void setup_per_zone_wmarks(void); extern int __meminit init_per_zone_wmark_min(void); extern void mem_init(void); diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 5a9238f6caad97..915f4f100383b5 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -14,6 +14,7 @@ #include #include #include +#include #include @@ -446,6 +447,13 @@ struct mm_struct { */ atomic_t has_pinned; + /** + * @write_protect_seq: Locked when any thread is write + * protecting pages mapped by this mm to enforce a later COW, + * for instance during page table copying for fork(). + */ + seqcount_t write_protect_seq; + #ifdef CONFIG_MMU atomic_long_t pgtables_bytes; /* PTE page table pages */ #endif diff --git a/include/linux/nvme.h b/include/linux/nvme.h index d925359976873a..bfed36e342ccb2 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -116,6 +116,9 @@ enum { NVME_REG_BPMBL = 0x0048, /* Boot Partition Memory Buffer * Location */ + NVME_REG_CMBMSC = 0x0050, /* Controller Memory Buffer Memory + * Space Control + */ NVME_REG_PMRCAP = 0x0e00, /* Persistent Memory Capabilities */ NVME_REG_PMRCTL = 0x0e04, /* Persistent Memory Region Control */ NVME_REG_PMRSTS = 0x0e08, /* Persistent Memory Region Status */ @@ -135,6 +138,7 @@ enum { #define NVME_CAP_CSS(cap) (((cap) >> 37) & 0xff) #define NVME_CAP_MPSMIN(cap) (((cap) >> 48) & 0xf) #define NVME_CAP_MPSMAX(cap) (((cap) >> 52) & 0xf) +#define NVME_CAP_CMBS(cap) (((cap) >> 57) & 0x1) #define NVME_CMB_BIR(cmbloc) ((cmbloc) & 0x7) #define NVME_CMB_OFST(cmbloc) (((cmbloc) >> 12) & 0xfffff) @@ -192,6 +196,8 @@ enum { NVME_CSTS_SHST_OCCUR = 1 << 2, NVME_CSTS_SHST_CMPLT = 2 << 2, NVME_CSTS_SHST_MASK = 3 << 2, + NVME_CMBMSC_CRE = 1 << 0, + NVME_CMBMSC_CMSE = 1 << 1, }; struct nvme_id_power_state { diff --git a/include/linux/of.h b/include/linux/of.h index 5d51891cbf1a68..af655d264f10fa 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -1300,6 +1300,7 @@ static inline int of_get_available_child_count(const struct device_node *np) #define _OF_DECLARE(table, name, compat, fn, fn_type) \ static const struct of_device_id __of_table_##name \ __used __section("__" #table "_of_table") \ + __aligned(__alignof__(struct of_device_id)) \ = { .compatible = compat, \ .data = (fn == (fn_type)NULL) ? fn : fn } #else diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 270cab43ca3dad..000cc0533c3366 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -80,6 +80,7 @@ extern void proc_flush_pid(struct pid *); extern struct proc_dir_entry *proc_symlink(const char *, struct proc_dir_entry *, const char *); +struct proc_dir_entry *_proc_mkdir(const char *, umode_t, struct proc_dir_entry *, void *, bool); extern struct proc_dir_entry *proc_mkdir(const char *, struct proc_dir_entry *); extern struct proc_dir_entry *proc_mkdir_data(const char *, umode_t, struct proc_dir_entry *, void *); @@ -162,6 +163,11 @@ static inline struct proc_dir_entry *proc_symlink(const char *name, static inline struct proc_dir_entry *proc_mkdir(const char *name, struct proc_dir_entry *parent) {return NULL;} static inline struct proc_dir_entry *proc_create_mount_point(const char *name) { return NULL; } +static inline struct proc_dir_entry *_proc_mkdir(const char *name, umode_t mode, + struct proc_dir_entry *parent, void *data, bool force_lookup) +{ + return NULL; +} static inline struct proc_dir_entry *proc_mkdir_data(const char *name, umode_t mode, struct proc_dir_entry *parent, void *data) { return NULL; } static inline struct proc_dir_entry *proc_mkdir_mode(const char *name, @@ -199,7 +205,7 @@ struct net; static inline struct proc_dir_entry *proc_net_mkdir( struct net *net, const char *name, struct proc_dir_entry *parent) { - return proc_mkdir_data(name, 0, parent, net); + return _proc_mkdir(name, 0, parent, net, true); } struct ns_common; diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 6cdd0152c253ae..5c119d6cecf14c 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -86,6 +86,12 @@ void rcu_sched_clock_irq(int user); void rcu_report_dead(unsigned int cpu); void rcutree_migrate_callbacks(int cpu); +#ifdef CONFIG_TASKS_RCU_GENERIC +void rcu_init_tasks_generic(void); +#else +static inline void rcu_init_tasks_generic(void) { } +#endif + #ifdef CONFIG_RCU_STALL_COMMON void rcu_sysrq_start(void); void rcu_sysrq_end(void); diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 3a6adfa70fb0ef..70085ca1a3fc9d 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -91,7 +91,6 @@ enum ttu_flags { TTU_SPLIT_HUGE_PMD = 0x4, /* split huge PMD if any */ TTU_IGNORE_MLOCK = 0x8, /* ignore mlock */ - TTU_IGNORE_ACCESS = 0x10, /* don't age */ TTU_IGNORE_HWPOISON = 0x20, /* corrupted page is recoverable */ TTU_BATCH_FLUSH = 0x40, /* Batch TLB flushes where possible * and caller guarantees they will diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h index 25e3fde8561781..4c715be4871719 100644 --- a/include/linux/rwsem.h +++ b/include/linux/rwsem.h @@ -123,6 +123,7 @@ static inline int rwsem_is_contended(struct rw_semaphore *sem) * lock for reading */ extern void down_read(struct rw_semaphore *sem); +extern int __must_check down_read_interruptible(struct rw_semaphore *sem); extern int __must_check down_read_killable(struct rw_semaphore *sem); /* @@ -171,6 +172,7 @@ extern void downgrade_write(struct rw_semaphore *sem); * See Documentation/locking/lockdep-design.rst for more details.) */ extern void down_read_nested(struct rw_semaphore *sem, int subclass); +extern int __must_check down_read_killable_nested(struct rw_semaphore *sem, int subclass); extern void down_write_nested(struct rw_semaphore *sem, int subclass); extern int down_write_killable_nested(struct rw_semaphore *sem, int subclass); extern void _down_write_nest_lock(struct rw_semaphore *sem, struct lockdep_map *nest_lock); @@ -191,6 +193,7 @@ extern void down_read_non_owner(struct rw_semaphore *sem); extern void up_read_non_owner(struct rw_semaphore *sem); #else # define down_read_nested(sem, subclass) down_read(sem) +# define down_read_killable_nested(sem, subclass) down_read_killable(sem) # define down_write_nest_lock(sem, nest_lock) down_write(sem) # define down_write_nested(sem, subclass) down_write(sem) # define down_write_killable_nested(sem, subclass) down_write_killable(sem) diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index 1bad18a1d8ba71..4b6a8234d7fc23 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -228,12 +228,13 @@ struct signal_struct { * credential calculations * (notably. ptrace) * Deprecated do not use in new code. - * Use exec_update_mutex instead. - */ - struct mutex exec_update_mutex; /* Held while task_struct is being - * updated during exec, and may have - * inconsistent permissions. + * Use exec_update_lock instead. */ + struct rw_semaphore exec_update_lock; /* Held while task_struct is + * being updated during exec, + * and may have inconsistent + * permissions. + */ } __randomize_layout; /* diff --git a/include/linux/seq_buf.h b/include/linux/seq_buf.h index fb0205d87d3c1d..9d6c28cc4d8f26 100644 --- a/include/linux/seq_buf.h +++ b/include/linux/seq_buf.h @@ -30,7 +30,7 @@ static inline void seq_buf_clear(struct seq_buf *s) } static inline void -seq_buf_init(struct seq_buf *s, unsigned char *buf, unsigned int size) +seq_buf_init(struct seq_buf *s, char *buf, unsigned int size) { s->buffer = buf; s->size = size; diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index a603d48d2b2cd5..3ac5037d1c3da2 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -330,6 +330,7 @@ struct xprt_class { struct rpc_xprt * (*setup)(struct xprt_create *); struct module *owner; char name[32]; + const char * netid[]; }; /* diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 37bea07c12f211..aea0ce9f3b745a 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -251,6 +251,30 @@ static inline int is_syscall_trace_event(struct trace_event_call *tp_event) static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) #endif /* __SYSCALL_DEFINEx */ +/* For split 64-bit arguments on 32-bit architectures */ +#ifdef __LITTLE_ENDIAN +#define SC_ARG64(name) u32, name##_lo, u32, name##_hi +#else +#define SC_ARG64(name) u32, name##_hi, u32, name##_lo +#endif +#define SC_VAL64(type, name) ((type) name##_hi << 32 | name##_lo) + +#ifdef CONFIG_COMPAT +#define SYSCALL32_DEFINE1 COMPAT_SYSCALL_DEFINE1 +#define SYSCALL32_DEFINE2 COMPAT_SYSCALL_DEFINE2 +#define SYSCALL32_DEFINE3 COMPAT_SYSCALL_DEFINE3 +#define SYSCALL32_DEFINE4 COMPAT_SYSCALL_DEFINE4 +#define SYSCALL32_DEFINE5 COMPAT_SYSCALL_DEFINE5 +#define SYSCALL32_DEFINE6 COMPAT_SYSCALL_DEFINE6 +#else +#define SYSCALL32_DEFINE1 SYSCALL_DEFINE1 +#define SYSCALL32_DEFINE2 SYSCALL_DEFINE2 +#define SYSCALL32_DEFINE3 SYSCALL_DEFINE3 +#define SYSCALL32_DEFINE4 SYSCALL_DEFINE4 +#define SYSCALL32_DEFINE5 SYSCALL_DEFINE5 +#define SYSCALL32_DEFINE6 SYSCALL_DEFINE6 +#endif + /* * Called before coming back to user-mode. Returning to user-mode with an * address limit different than USER_DS can allow to overwrite kernel memory. diff --git a/include/linux/trace_seq.h b/include/linux/trace_seq.h index 6c30508fca1989..5a2c650d9e1c10 100644 --- a/include/linux/trace_seq.h +++ b/include/linux/trace_seq.h @@ -12,7 +12,7 @@ */ struct trace_seq { - unsigned char buffer[PAGE_SIZE]; + char buffer[PAGE_SIZE]; struct seq_buf seq; int full; }; @@ -51,7 +51,7 @@ static inline int trace_seq_used(struct trace_seq *s) * that is about to be written to and then return the result * of that write. */ -static inline unsigned char * +static inline char * trace_seq_buffer_ptr(struct trace_seq *s) { return s->buffer + seq_buf_used(&s->seq); diff --git a/include/linux/tty.h b/include/linux/tty.h index eb33d948788cc3..bc8caac390fce9 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -422,6 +422,7 @@ extern void tty_kclose(struct tty_struct *tty); extern int tty_dev_name_to_number(const char *name, dev_t *number); extern int tty_ldisc_lock(struct tty_struct *tty, unsigned long timeout); extern void tty_ldisc_unlock(struct tty_struct *tty); +extern ssize_t redirected_tty_write(struct kiocb *, struct iov_iter *); #else static inline void tty_kref_put(struct tty_struct *tty) { } diff --git a/include/linux/usb_usual.h b/include/linux/usb_usual.h index 4a19ac3f24d068..6b03fdd69d2745 100644 --- a/include/linux/usb_usual.h +++ b/include/linux/usb_usual.h @@ -84,6 +84,8 @@ /* Cannot handle REPORT_LUNS */ \ US_FLAG(ALWAYS_SYNC, 0x20000000) \ /* lies about caching, so always sync */ \ + US_FLAG(NO_SAME, 0x40000000) \ + /* Cannot handle WRITE_SAME */ \ #define US_FLAG(name, value) US_FL_##name = value , enum { US_DO_ALL_FLAGS }; diff --git a/include/media/v4l2-fwnode.h b/include/media/v4l2-fwnode.h index c0907427654319..ed0840f3d5dff4 100644 --- a/include/media/v4l2-fwnode.h +++ b/include/media/v4l2-fwnode.h @@ -231,6 +231,9 @@ struct v4l2_fwnode_connector { * guessing @vep.bus_type between CSI-2 D-PHY, parallel and BT.656 busses is * supported. NEVER RELY ON GUESSING @vep.bus_type IN NEW DRIVERS! * + * The caller is required to initialise all fields of @vep, either with + * explicitly values, or by zeroing them. + * * The function does not change the V4L2 fwnode endpoint state if it fails. * * NOTE: This function does not parse properties the size of which is variable @@ -273,6 +276,9 @@ void v4l2_fwnode_endpoint_free(struct v4l2_fwnode_endpoint *vep); * guessing @vep.bus_type between CSI-2 D-PHY, parallel and BT.656 busses is * supported. NEVER RELY ON GUESSING @vep.bus_type IN NEW DRIVERS! * + * The caller is required to initialise all fields of @vep, either with + * explicitly values, or by zeroing them. + * * The function does not change the V4L2 fwnode endpoint state if it fails. * * v4l2_fwnode_endpoint_alloc_parse() has two important differences to diff --git a/include/media/v4l2-mediabus.h b/include/media/v4l2-mediabus.h index 59b1de1971142f..c20e2dc6d4320d 100644 --- a/include/media/v4l2-mediabus.h +++ b/include/media/v4l2-mediabus.h @@ -103,6 +103,7 @@ * @V4L2_MBUS_CCP2: CCP2 (Compact Camera Port 2) * @V4L2_MBUS_CSI2_DPHY: MIPI CSI-2 serial interface, with D-PHY * @V4L2_MBUS_CSI2_CPHY: MIPI CSI-2 serial interface, with C-PHY + * @V4L2_MBUS_INVALID: invalid bus type (keep as last) */ enum v4l2_mbus_type { V4L2_MBUS_UNKNOWN, @@ -112,6 +113,7 @@ enum v4l2_mbus_type { V4L2_MBUS_CCP2, V4L2_MBUS_CSI2_DPHY, V4L2_MBUS_CSI2_CPHY, + V4L2_MBUS_INVALID, }; /** diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 7338b3865a2a3d..111d7771b20815 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -76,6 +76,8 @@ struct inet_connection_sock_af_ops { * @icsk_ext_hdr_len: Network protocol overhead (IP/IPv6 options) * @icsk_ack: Delayed ACK control data * @icsk_mtup; MTU probing control data + * @icsk_probes_tstamp: Probe timestamp (cleared by non-zero window ack) + * @icsk_user_timeout: TCP_USER_TIMEOUT value */ struct inet_connection_sock { /* inet_sock has to be the first member! */ @@ -129,6 +131,7 @@ struct inet_connection_sock { u32 probe_timestamp; } icsk_mtup; + u32 icsk_probes_tstamp; u32 icsk_user_timeout; u64 icsk_ca_priv[104 / sizeof(u64)]; diff --git a/include/net/red.h b/include/net/red.h index fc455445f4b226..932f0d79d60cbb 100644 --- a/include/net/red.h +++ b/include/net/red.h @@ -168,12 +168,14 @@ static inline void red_set_vars(struct red_vars *v) v->qcount = -1; } -static inline bool red_check_params(u32 qth_min, u32 qth_max, u8 Wlog) +static inline bool red_check_params(u32 qth_min, u32 qth_max, u8 Wlog, u8 Scell_log) { if (fls(qth_min) + Wlog > 32) return false; if (fls(qth_max) + Wlog > 32) return false; + if (Scell_log >= 32) + return false; if (qth_max < qth_min) return false; return true; diff --git a/include/net/sock.h b/include/net/sock.h index a5c6ae78df77d0..253202dcc5e61a 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1903,10 +1903,13 @@ static inline void sk_set_txhash(struct sock *sk) sk->sk_txhash = net_tx_rndhash(); } -static inline void sk_rethink_txhash(struct sock *sk) +static inline bool sk_rethink_txhash(struct sock *sk) { - if (sk->sk_txhash) + if (sk->sk_txhash) { sk_set_txhash(sk); + return true; + } + return false; } static inline struct dst_entry * @@ -1929,12 +1932,10 @@ sk_dst_get(struct sock *sk) return dst; } -static inline void dst_negative_advice(struct sock *sk) +static inline void __dst_negative_advice(struct sock *sk) { struct dst_entry *ndst, *dst = __sk_dst_get(sk); - sk_rethink_txhash(sk); - if (dst && dst->ops->negative_advice) { ndst = dst->ops->negative_advice(dst); @@ -1946,6 +1947,12 @@ static inline void dst_negative_advice(struct sock *sk) } } +static inline void dst_negative_advice(struct sock *sk) +{ + sk_rethink_txhash(sk); + __dst_negative_advice(sk); +} + static inline void __sk_dst_set(struct sock *sk, struct dst_entry *dst) { diff --git a/include/net/tcp.h b/include/net/tcp.h index d4ef5bf941689c..fe9747ee70a6f8 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -625,6 +625,7 @@ static inline void tcp_clear_xmit_timers(struct sock *sk) unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu); unsigned int tcp_current_mss(struct sock *sk); +u32 tcp_clamp_probe0_to_user_timeout(const struct sock *sk, u32 when); /* Bound MSS / TSO packet size with the half of the window */ static inline int tcp_bound_to_half_wnd(struct tcp_sock *tp, int pktsize) @@ -2065,7 +2066,7 @@ void tcp_mark_skb_lost(struct sock *sk, struct sk_buff *skb); void tcp_newreno_mark_lost(struct sock *sk, bool snd_una_advanced); extern s32 tcp_rack_skb_timeout(struct tcp_sock *tp, struct sk_buff *skb, u32 reo_wnd); -extern void tcp_rack_mark_lost(struct sock *sk); +extern bool tcp_rack_mark_lost(struct sock *sk); extern void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq, u64 xmit_time); extern void tcp_rack_reo_timeout(struct sock *sk); diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h index 4f4e93bf814c3e..cc17bc95754825 100644 --- a/include/net/xdp_sock.h +++ b/include/net/xdp_sock.h @@ -58,10 +58,6 @@ struct xdp_sock { struct xsk_queue *tx ____cacheline_aligned_in_smp; struct list_head tx_list; - /* Mutual exclusion of NAPI TX thread and sendmsg error paths - * in the SKB destructor callback. - */ - spinlock_t tx_completion_lock; /* Protects generic receive. */ spinlock_t rx_lock; diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h index 01755b838c7450..eaa8386dbc630b 100644 --- a/include/net/xsk_buff_pool.h +++ b/include/net/xsk_buff_pool.h @@ -73,6 +73,11 @@ struct xsk_buff_pool { bool dma_need_sync; bool unaligned; void *addrs; + /* Mutual exclusion of the completion ring in the SKB mode. Two cases to protect: + * NAPI TX thread and sendmsg error paths in the SKB destructor callback and when + * sockets share a single cq when the same netdev and queue id is shared. + */ + spinlock_t cq_lock; struct xdp_buff_xsk *free_heads[]; }; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 9bf6c319a670e2..65771bef5e654a 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -3943,6 +3943,16 @@ static inline int ib_req_ncomp_notif(struct ib_cq *cq, int wc_cnt) -ENOSYS; } +/* + * Drivers that don't need a DMA mapping at the RDMA layer, set dma_device to + * NULL. This causes the ib_dma* helpers to just stash the kernel virtual + * address into the dma address. + */ +static inline bool ib_uses_virt_dma(struct ib_device *dev) +{ + return IS_ENABLED(CONFIG_INFINIBAND_VIRT_DMA) && !dev->dma_device; +} + /** * ib_dma_mapping_error - check a DMA addr for error * @dev: The device for which the dma_addr was created @@ -3950,6 +3960,8 @@ static inline int ib_req_ncomp_notif(struct ib_cq *cq, int wc_cnt) */ static inline int ib_dma_mapping_error(struct ib_device *dev, u64 dma_addr) { + if (ib_uses_virt_dma(dev)) + return 0; return dma_mapping_error(dev->dma_device, dma_addr); } @@ -3964,6 +3976,8 @@ static inline u64 ib_dma_map_single(struct ib_device *dev, void *cpu_addr, size_t size, enum dma_data_direction direction) { + if (ib_uses_virt_dma(dev)) + return (uintptr_t)cpu_addr; return dma_map_single(dev->dma_device, cpu_addr, size, direction); } @@ -3978,7 +3992,8 @@ static inline void ib_dma_unmap_single(struct ib_device *dev, u64 addr, size_t size, enum dma_data_direction direction) { - dma_unmap_single(dev->dma_device, addr, size, direction); + if (!ib_uses_virt_dma(dev)) + dma_unmap_single(dev->dma_device, addr, size, direction); } /** @@ -3995,6 +4010,8 @@ static inline u64 ib_dma_map_page(struct ib_device *dev, size_t size, enum dma_data_direction direction) { + if (ib_uses_virt_dma(dev)) + return (uintptr_t)(page_address(page) + offset); return dma_map_page(dev->dma_device, page, offset, size, direction); } @@ -4009,7 +4026,30 @@ static inline void ib_dma_unmap_page(struct ib_device *dev, u64 addr, size_t size, enum dma_data_direction direction) { - dma_unmap_page(dev->dma_device, addr, size, direction); + if (!ib_uses_virt_dma(dev)) + dma_unmap_page(dev->dma_device, addr, size, direction); +} + +int ib_dma_virt_map_sg(struct ib_device *dev, struct scatterlist *sg, int nents); +static inline int ib_dma_map_sg_attrs(struct ib_device *dev, + struct scatterlist *sg, int nents, + enum dma_data_direction direction, + unsigned long dma_attrs) +{ + if (ib_uses_virt_dma(dev)) + return ib_dma_virt_map_sg(dev, sg, nents); + return dma_map_sg_attrs(dev->dma_device, sg, nents, direction, + dma_attrs); +} + +static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev, + struct scatterlist *sg, int nents, + enum dma_data_direction direction, + unsigned long dma_attrs) +{ + if (!ib_uses_virt_dma(dev)) + dma_unmap_sg_attrs(dev->dma_device, sg, nents, direction, + dma_attrs); } /** @@ -4023,7 +4063,7 @@ static inline int ib_dma_map_sg(struct ib_device *dev, struct scatterlist *sg, int nents, enum dma_data_direction direction) { - return dma_map_sg(dev->dma_device, sg, nents, direction); + return ib_dma_map_sg_attrs(dev, sg, nents, direction, 0); } /** @@ -4037,24 +4077,7 @@ static inline void ib_dma_unmap_sg(struct ib_device *dev, struct scatterlist *sg, int nents, enum dma_data_direction direction) { - dma_unmap_sg(dev->dma_device, sg, nents, direction); -} - -static inline int ib_dma_map_sg_attrs(struct ib_device *dev, - struct scatterlist *sg, int nents, - enum dma_data_direction direction, - unsigned long dma_attrs) -{ - return dma_map_sg_attrs(dev->dma_device, sg, nents, direction, - dma_attrs); -} - -static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev, - struct scatterlist *sg, int nents, - enum dma_data_direction direction, - unsigned long dma_attrs) -{ - dma_unmap_sg_attrs(dev->dma_device, sg, nents, direction, dma_attrs); + ib_dma_unmap_sg_attrs(dev, sg, nents, direction, 0); } /** @@ -4065,6 +4088,8 @@ static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev, */ static inline unsigned int ib_dma_max_seg_size(struct ib_device *dev) { + if (ib_uses_virt_dma(dev)) + return UINT_MAX; return dma_get_max_seg_size(dev->dma_device); } @@ -4080,7 +4105,8 @@ static inline void ib_dma_sync_single_for_cpu(struct ib_device *dev, size_t size, enum dma_data_direction dir) { - dma_sync_single_for_cpu(dev->dma_device, addr, size, dir); + if (!ib_uses_virt_dma(dev)) + dma_sync_single_for_cpu(dev->dma_device, addr, size, dir); } /** @@ -4095,7 +4121,8 @@ static inline void ib_dma_sync_single_for_device(struct ib_device *dev, size_t size, enum dma_data_direction dir) { - dma_sync_single_for_device(dev->dma_device, addr, size, dir); + if (!ib_uses_virt_dma(dev)) + dma_sync_single_for_device(dev->dma_device, addr, size, dir); } /** diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h index b00270c72740f5..94fac55772f57d 100644 --- a/include/rdma/uverbs_ioctl.h +++ b/include/rdma/uverbs_ioctl.h @@ -862,6 +862,16 @@ static inline __malloc void *uverbs_zalloc(struct uverbs_attr_bundle *bundle, { return _uverbs_alloc(bundle, size, GFP_KERNEL | __GFP_ZERO); } + +static inline __malloc void *uverbs_kcalloc(struct uverbs_attr_bundle *bundle, + size_t n, size_t size) +{ + size_t bytes; + + if (unlikely(check_mul_overflow(n, size, &bytes))) + return ERR_PTR(-EOVERFLOW); + return uverbs_zalloc(bundle, bytes); +} int _uverbs_get_const(s64 *to, const struct uverbs_attr_bundle *attrs_bundle, size_t idx, s64 lower_bound, u64 upper_bound, s64 *def_val); diff --git a/include/uapi/linux/android/binder.h b/include/uapi/linux/android/binder.h index f1ce2c4c077e2f..ec84ad10656834 100644 --- a/include/uapi/linux/android/binder.h +++ b/include/uapi/linux/android/binder.h @@ -248,6 +248,7 @@ enum transaction_flags { TF_ROOT_OBJECT = 0x04, /* contents are the component's root object */ TF_STATUS_CODE = 0x08, /* contents are a 32-bit status code */ TF_ACCEPT_FDS = 0x10, /* allow replies with file descriptors */ + TF_CLEAR_BUF = 0x20, /* clear buffer on txn complete */ }; struct binder_transaction_data { diff --git a/include/uapi/linux/bcache.h b/include/uapi/linux/bcache.h index 52e8bcb339811c..cf7399f03b7123 100644 --- a/include/uapi/linux/bcache.h +++ b/include/uapi/linux/bcache.h @@ -213,7 +213,7 @@ struct cache_sb_disk { __le16 keys; }; __le64 d[SB_JOURNAL_BUCKETS]; /* journal buckets */ - __le16 bucket_size_hi; + __le16 obso_bucket_size_hi; /* obsoleted */ }; /* diff --git a/include/uapi/linux/const.h b/include/uapi/linux/const.h index 5ed721ad5b1985..af2a44c08683de 100644 --- a/include/uapi/linux/const.h +++ b/include/uapi/linux/const.h @@ -28,4 +28,9 @@ #define _BITUL(x) (_UL(1) << (x)) #define _BITULL(x) (_ULL(1) << (x)) +#define __ALIGN_KERNEL(x, a) __ALIGN_KERNEL_MASK(x, (typeof(x))(a) - 1) +#define __ALIGN_KERNEL_MASK(x, mask) (((x) + (mask)) & ~(mask)) + +#define __KERNEL_DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d)) + #endif /* _UAPI_LINUX_CONST_H */ diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 5203f54a2be1c0..cf89c318f2ac90 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -322,7 +322,7 @@ enum devlink_reload_limit { DEVLINK_RELOAD_LIMIT_MAX = __DEVLINK_RELOAD_LIMIT_MAX - 1 }; -#define DEVLINK_RELOAD_LIMITS_VALID_MASK (BIT(__DEVLINK_RELOAD_LIMIT_MAX) - 1) +#define DEVLINK_RELOAD_LIMITS_VALID_MASK (_BITUL(__DEVLINK_RELOAD_LIMIT_MAX) - 1) enum devlink_attr { /* don't change the order or add anything between, this is ABI! */ diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 9ca87bc73c447c..cde753bb209356 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -14,7 +14,7 @@ #ifndef _UAPI_LINUX_ETHTOOL_H #define _UAPI_LINUX_ETHTOOL_H -#include +#include #include #include diff --git a/include/uapi/linux/fscrypt.h b/include/uapi/linux/fscrypt.h index e5de6033693812..9f4428be3e3626 100644 --- a/include/uapi/linux/fscrypt.h +++ b/include/uapi/linux/fscrypt.h @@ -20,7 +20,6 @@ #define FSCRYPT_POLICY_FLAG_DIRECT_KEY 0x04 #define FSCRYPT_POLICY_FLAG_IV_INO_LBLK_64 0x08 #define FSCRYPT_POLICY_FLAG_IV_INO_LBLK_32 0x10 -#define FSCRYPT_POLICY_FLAGS_VALID 0x1F /* Encryption algorithms */ #define FSCRYPT_MODE_AES_256_XTS 1 @@ -28,7 +27,7 @@ #define FSCRYPT_MODE_AES_128_CBC 5 #define FSCRYPT_MODE_AES_128_CTS 6 #define FSCRYPT_MODE_ADIANTUM 9 -#define __FSCRYPT_MODE_MAX 9 +/* If adding a mode number > 9, update FSCRYPT_MODE_MAX in fscrypt_private.h */ /* * Legacy policy version; ad-hoc KDF and no key verification. @@ -177,7 +176,7 @@ struct fscrypt_get_key_status_arg { #define FS_POLICY_FLAGS_PAD_32 FSCRYPT_POLICY_FLAGS_PAD_32 #define FS_POLICY_FLAGS_PAD_MASK FSCRYPT_POLICY_FLAGS_PAD_MASK #define FS_POLICY_FLAG_DIRECT_KEY FSCRYPT_POLICY_FLAG_DIRECT_KEY -#define FS_POLICY_FLAGS_VALID FSCRYPT_POLICY_FLAGS_VALID +#define FS_POLICY_FLAGS_VALID 0x07 /* contains old flags only */ #define FS_ENCRYPTION_MODE_INVALID 0 /* never used */ #define FS_ENCRYPTION_MODE_AES_256_XTS FSCRYPT_MODE_AES_256_XTS #define FS_ENCRYPTION_MODE_AES_256_GCM 2 /* never used */ diff --git a/include/uapi/linux/if_alg.h b/include/uapi/linux/if_alg.h index 60b7c2efd921c1..dc52a11ba6d158 100644 --- a/include/uapi/linux/if_alg.h +++ b/include/uapi/linux/if_alg.h @@ -24,6 +24,22 @@ struct sockaddr_alg { __u8 salg_name[64]; }; +/* + * Linux v4.12 and later removed the 64-byte limit on salg_name[]; it's now an + * arbitrary-length field. We had to keep the original struct above for source + * compatibility with existing userspace programs, though. Use the new struct + * below if support for very long algorithm names is needed. To do this, + * allocate 'sizeof(struct sockaddr_alg_new) + strlen(algname) + 1' bytes, and + * copy algname (including the null terminator) into salg_name. + */ +struct sockaddr_alg_new { + __u16 salg_family; + __u8 salg_type[14]; + __u32 salg_feat; + __u32 salg_mask; + __u8 salg_name[]; +}; + struct af_alg_iv { __u32 ivlen; __u8 iv[0]; diff --git a/include/uapi/linux/kernel.h b/include/uapi/linux/kernel.h index 0ff8f7477847ce..fadf2db71fe8a4 100644 --- a/include/uapi/linux/kernel.h +++ b/include/uapi/linux/kernel.h @@ -3,13 +3,6 @@ #define _UAPI_LINUX_KERNEL_H #include - -/* - * 'kernel.h' contains some often-used function prototypes etc - */ -#define __ALIGN_KERNEL(x, a) __ALIGN_KERNEL_MASK(x, (typeof(x))(a) - 1) -#define __ALIGN_KERNEL_MASK(x, mask) (((x) + (mask)) & ~(mask)) - -#define __KERNEL_DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d)) +#include #endif /* _UAPI_LINUX_KERNEL_H */ diff --git a/include/uapi/linux/lightnvm.h b/include/uapi/linux/lightnvm.h index f9a1be7fc6962e..ead2e72e5c88ea 100644 --- a/include/uapi/linux/lightnvm.h +++ b/include/uapi/linux/lightnvm.h @@ -21,7 +21,7 @@ #define _UAPI_LINUX_LIGHTNVM_H #ifdef __KERNEL__ -#include +#include #include #else /* __KERNEL__ */ #include diff --git a/include/uapi/linux/mroute6.h b/include/uapi/linux/mroute6.h index c36177a86516ee..a1fd6173e2dbe9 100644 --- a/include/uapi/linux/mroute6.h +++ b/include/uapi/linux/mroute6.h @@ -2,7 +2,7 @@ #ifndef _UAPI__LINUX_MROUTE6_H #define _UAPI__LINUX_MROUTE6_H -#include +#include #include #include #include /* For struct sockaddr_in6. */ diff --git a/include/uapi/linux/netfilter/x_tables.h b/include/uapi/linux/netfilter/x_tables.h index a8283f7dbc5191..b8c6bb233ac1c1 100644 --- a/include/uapi/linux/netfilter/x_tables.h +++ b/include/uapi/linux/netfilter/x_tables.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ #ifndef _UAPI_X_TABLES_H #define _UAPI_X_TABLES_H -#include +#include #include #define XT_FUNCTION_MAXNAMELEN 30 diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h index c3816ff7bfc32f..3d94269bbfa87c 100644 --- a/include/uapi/linux/netlink.h +++ b/include/uapi/linux/netlink.h @@ -2,7 +2,7 @@ #ifndef _UAPI__LINUX_NETLINK_H #define _UAPI__LINUX_NETLINK_H -#include +#include #include /* for __kernel_sa_family_t */ #include diff --git a/include/uapi/linux/ptrace.h b/include/uapi/linux/ptrace.h index a71b6e3b03ebc2..83ee45fa634b95 100644 --- a/include/uapi/linux/ptrace.h +++ b/include/uapi/linux/ptrace.h @@ -81,7 +81,8 @@ struct seccomp_metadata { struct ptrace_syscall_info { __u8 op; /* PTRACE_SYSCALL_INFO_* */ - __u32 arch __attribute__((__aligned__(sizeof(__u32)))); + __u8 pad[3]; + __u32 arch; __u64 instruction_pointer; __u64 stack_pointer; union { diff --git a/include/uapi/linux/rpl.h b/include/uapi/linux/rpl.h index 1dccb55cf8c647..708adddf9f1389 100644 --- a/include/uapi/linux/rpl.h +++ b/include/uapi/linux/rpl.h @@ -28,10 +28,10 @@ struct ipv6_rpl_sr_hdr { pad:4, reserved1:16; #elif defined(__BIG_ENDIAN_BITFIELD) - __u32 reserved:20, + __u32 cmpri:4, + cmpre:4, pad:4, - cmpri:4, - cmpre:4; + reserved:20; #else #error "Please fix " #endif diff --git a/include/uapi/linux/sysctl.h b/include/uapi/linux/sysctl.h index 27c1ed2822e698..458179df9b2719 100644 --- a/include/uapi/linux/sysctl.h +++ b/include/uapi/linux/sysctl.h @@ -23,7 +23,7 @@ #ifndef _UAPI_LINUX_SYSCTL_H #define _UAPI_LINUX_SYSCTL_H -#include +#include #include #include diff --git a/include/uapi/linux/v4l2-subdev.h b/include/uapi/linux/v4l2-subdev.h index 00850b98078a27..a38454d9e0f54e 100644 --- a/include/uapi/linux/v4l2-subdev.h +++ b/include/uapi/linux/v4l2-subdev.h @@ -176,7 +176,7 @@ struct v4l2_subdev_capability { }; /* The v4l2 sub-device video device node is registered in read-only mode. */ -#define V4L2_SUBDEV_CAP_RO_SUBDEV BIT(0) +#define V4L2_SUBDEV_CAP_RO_SUBDEV 0x00000001 /* Backwards compatibility define --- to be removed */ #define v4l2_subdev_edid v4l2_edid diff --git a/include/uapi/rdma/vmw_pvrdma-abi.h b/include/uapi/rdma/vmw_pvrdma-abi.h index f8b638c73371d8..901a4fd72c09f8 100644 --- a/include/uapi/rdma/vmw_pvrdma-abi.h +++ b/include/uapi/rdma/vmw_pvrdma-abi.h @@ -133,6 +133,13 @@ enum pvrdma_wc_flags { PVRDMA_WC_FLAGS_MAX = PVRDMA_WC_WITH_NETWORK_HDR_TYPE, }; +enum pvrdma_network_type { + PVRDMA_NETWORK_IB, + PVRDMA_NETWORK_ROCE_V1 = PVRDMA_NETWORK_IB, + PVRDMA_NETWORK_IPV4, + PVRDMA_NETWORK_IPV6 +}; + struct pvrdma_alloc_ucontext_resp { __u32 qp_tab_size; __u32 reserved; diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h index 5a8315e6d8a607..2c43b0ef1e4d50 100644 --- a/include/xen/xenbus.h +++ b/include/xen/xenbus.h @@ -61,6 +61,15 @@ struct xenbus_watch /* Path being watched. */ const char *node; + unsigned int nr_pending; + + /* + * Called just before enqueing new event while a spinlock is held. + * The event will be discarded if this callback returns false. + */ + bool (*will_handle)(struct xenbus_watch *, + const char *path, const char *token); + /* Callback (executed in a process context with no locks held). */ void (*callback)(struct xenbus_watch *, const char *path, const char *token); @@ -183,7 +192,7 @@ void xs_suspend_cancel(void); struct work_struct; -void xenbus_probe(struct work_struct *); +void xenbus_probe(void); #define XENBUS_IS_ERR_READ(str) ({ \ if (!IS_ERR(str) && strlen(str) == 0) { \ @@ -197,10 +206,14 @@ void xenbus_probe(struct work_struct *); int xenbus_watch_path(struct xenbus_device *dev, const char *path, struct xenbus_watch *watch, + bool (*will_handle)(struct xenbus_watch *, + const char *, const char *), void (*callback)(struct xenbus_watch *, const char *, const char *)); -__printf(4, 5) +__printf(5, 6) int xenbus_watch_pathfmt(struct xenbus_device *dev, struct xenbus_watch *watch, + bool (*will_handle)(struct xenbus_watch *, + const char *, const char *), void (*callback)(struct xenbus_watch *, const char *, const char *), const char *pathfmt, ...); diff --git a/init/init_task.c b/init/init_task.c index a56f0abb63e934..15f6eb93a04fa2 100644 --- a/init/init_task.c +++ b/init/init_task.c @@ -26,7 +26,7 @@ static struct signal_struct init_signals = { .multiprocess = HLIST_HEAD_INIT, .rlim = INIT_RLIMITS, .cred_guard_mutex = __MUTEX_INITIALIZER(init_signals.cred_guard_mutex), - .exec_update_mutex = __MUTEX_INITIALIZER(init_signals.exec_update_mutex), + .exec_update_lock = __RWSEM_INITIALIZER(init_signals.exec_update_lock), #ifdef CONFIG_POSIX_TIMERS .posix_timers = LIST_HEAD_INIT(init_signals.posix_timers), .cputimer = { diff --git a/init/main.c b/init/main.c index 32b2a8affafd1b..9d964511fe0c2d 100644 --- a/init/main.c +++ b/init/main.c @@ -1512,6 +1512,7 @@ static noinline void __init kernel_init_freeable(void) init_mm_internals(); + rcu_init_tasks_generic(); do_pre_smp_initcalls(); lockup_detector_init(); diff --git a/kernel/audit_fsnotify.c b/kernel/audit_fsnotify.c index bfcfcd61adb64a..5b3f01da172bce 100644 --- a/kernel/audit_fsnotify.c +++ b/kernel/audit_fsnotify.c @@ -154,7 +154,7 @@ static void audit_autoremove_mark_rule(struct audit_fsnotify_mark *audit_mark) /* Update mark data in audit rules based on fsnotify events. */ static int audit_mark_handle_event(struct fsnotify_mark *inode_mark, u32 mask, struct inode *inode, struct inode *dir, - const struct qstr *dname) + const struct qstr *dname, u32 cookie) { struct audit_fsnotify_mark *audit_mark; diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c index 83e1c07fc99e1e..6c91902f4f455a 100644 --- a/kernel/audit_tree.c +++ b/kernel/audit_tree.c @@ -1037,7 +1037,7 @@ static void evict_chunk(struct audit_chunk *chunk) static int audit_tree_handle_event(struct fsnotify_mark *mark, u32 mask, struct inode *inode, struct inode *dir, - const struct qstr *file_name) + const struct qstr *file_name, u32 cookie) { return 0; } diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c index 246e5ba704c006..2acf7ca4915421 100644 --- a/kernel/audit_watch.c +++ b/kernel/audit_watch.c @@ -466,7 +466,7 @@ void audit_remove_watch_rule(struct audit_krule *krule) /* Update watch data in audit rules based on fsnotify events. */ static int audit_watch_handle_event(struct fsnotify_mark *inode_mark, u32 mask, struct inode *inode, struct inode *dir, - const struct qstr *dname) + const struct qstr *dname, u32 cookie) { struct audit_parent *parent; diff --git a/kernel/bpf/bpf_inode_storage.c b/kernel/bpf/bpf_inode_storage.c index 6edff97ad594bd..dbc1dbdd2cbf0f 100644 --- a/kernel/bpf/bpf_inode_storage.c +++ b/kernel/bpf/bpf_inode_storage.c @@ -176,7 +176,7 @@ BPF_CALL_4(bpf_inode_storage_get, struct bpf_map *, map, struct inode *, inode, * bpf_local_storage_update expects the owner to have a * valid storage pointer. */ - if (!inode_storage_ptr(inode)) + if (!inode || !inode_storage_ptr(inode)) return (unsigned long)NULL; sdata = inode_storage_lookup(inode, map, true); @@ -200,6 +200,9 @@ BPF_CALL_4(bpf_inode_storage_get, struct bpf_map *, map, struct inode *, inode, BPF_CALL_2(bpf_inode_storage_delete, struct bpf_map *, map, struct inode *, inode) { + if (!inode) + return -EINVAL; + /* This helper must only called from where the inode is gurranteed * to have a refcount and cannot be freed. */ diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 6ec088a96302f9..96555a8a2c5454 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -1391,12 +1391,13 @@ int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level, if (ctx.optlen != 0) { *optlen = ctx.optlen; *kernel_optval = ctx.optval; + /* export and don't free sockopt buf */ + return 0; } } out: - if (ret) - sockopt_free_buf(&ctx); + sockopt_free_buf(&ctx); return ret; } diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index deda1185237b81..c489430cac78ca 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -108,7 +108,7 @@ BPF_CALL_2(bpf_map_peek_elem, struct bpf_map *, map, void *, value) } const struct bpf_func_proto bpf_map_peek_elem_proto = { - .func = bpf_map_pop_elem, + .func = bpf_map_peek_elem, .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_CONST_MAP_PTR, diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 8f50c9c19f1b04..9433ab9995cd73 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -2717,7 +2717,6 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog, out_put_prog: if (tgt_prog_fd && tgt_prog) bpf_prog_put(tgt_prog); - bpf_prog_put(prog); return err; } @@ -2830,7 +2829,10 @@ static int bpf_raw_tracepoint_open(const union bpf_attr *attr) tp_name = prog->aux->attach_func_name; break; } - return bpf_tracing_prog_attach(prog, 0, 0); + err = bpf_tracing_prog_attach(prog, 0, 0); + if (err >= 0) + return err; + goto out_put_prog; case BPF_PROG_TYPE_RAW_TRACEPOINT: case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE: if (strncpy_from_user(buf, diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c index 5b6af30bfbcd88..f3d3a562a802a6 100644 --- a/kernel/bpf/task_iter.c +++ b/kernel/bpf/task_iter.c @@ -136,8 +136,7 @@ struct bpf_iter_seq_task_file_info { }; static struct file * -task_file_seq_get_next(struct bpf_iter_seq_task_file_info *info, - struct task_struct **task, struct files_struct **fstruct) +task_file_seq_get_next(struct bpf_iter_seq_task_file_info *info) { struct pid_namespace *ns = info->common.ns; u32 curr_tid = info->tid, max_fds; @@ -150,26 +149,29 @@ task_file_seq_get_next(struct bpf_iter_seq_task_file_info *info, * Otherwise, it does not hold any reference. */ again: - if (*task) { - curr_task = *task; - curr_files = *fstruct; + if (info->task) { + curr_task = info->task; + curr_files = info->files; curr_fd = info->fd; } else { curr_task = task_seq_get_next(ns, &curr_tid, true); - if (!curr_task) + if (!curr_task) { + info->task = NULL; + info->files = NULL; + info->tid = curr_tid; return NULL; + } curr_files = get_files_struct(curr_task); if (!curr_files) { put_task_struct(curr_task); - curr_tid = ++(info->tid); + curr_tid = curr_tid + 1; info->fd = 0; goto again; } - /* set *fstruct, *task and info->tid */ - *fstruct = curr_files; - *task = curr_task; + info->files = curr_files; + info->task = curr_task; if (curr_tid == info->tid) { curr_fd = info->fd; } else { @@ -199,8 +201,8 @@ task_file_seq_get_next(struct bpf_iter_seq_task_file_info *info, rcu_read_unlock(); put_files_struct(curr_files); put_task_struct(curr_task); - *task = NULL; - *fstruct = NULL; + info->task = NULL; + info->files = NULL; info->fd = 0; curr_tid = ++(info->tid); goto again; @@ -209,21 +211,13 @@ task_file_seq_get_next(struct bpf_iter_seq_task_file_info *info, static void *task_file_seq_start(struct seq_file *seq, loff_t *pos) { struct bpf_iter_seq_task_file_info *info = seq->private; - struct files_struct *files = NULL; - struct task_struct *task = NULL; struct file *file; - file = task_file_seq_get_next(info, &task, &files); - if (!file) { - info->files = NULL; - info->task = NULL; - return NULL; - } - - if (*pos == 0) + info->task = NULL; + info->files = NULL; + file = task_file_seq_get_next(info); + if (file && *pos == 0) ++*pos; - info->task = task; - info->files = files; return file; } @@ -231,24 +225,11 @@ static void *task_file_seq_start(struct seq_file *seq, loff_t *pos) static void *task_file_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct bpf_iter_seq_task_file_info *info = seq->private; - struct files_struct *files = info->files; - struct task_struct *task = info->task; - struct file *file; ++*pos; ++info->fd; fput((struct file *)v); - file = task_file_seq_get_next(info, &task, &files); - if (!file) { - info->files = NULL; - info->task = NULL; - return NULL; - } - - info->task = task; - info->files = files; - - return file; + return task_file_seq_get_next(info); } struct bpf_iter__task_file { diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 53fe6ef6d931f5..618cb1b451adeb 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2214,6 +2214,8 @@ static bool is_spillable_regtype(enum bpf_reg_type type) case PTR_TO_RDWR_BUF: case PTR_TO_RDWR_BUF_OR_NULL: case PTR_TO_PERCPU_BTF_ID: + case PTR_TO_MEM: + case PTR_TO_MEM_OR_NULL: return true; default: return false; @@ -5255,7 +5257,7 @@ static bool signed_add_overflows(s64 a, s64 b) return res < a; } -static bool signed_add32_overflows(s64 a, s64 b) +static bool signed_add32_overflows(s32 a, s32 b) { /* Do the add in u32, where overflow is well-defined */ s32 res = (s32)((u32)a + (u32)b); @@ -5265,7 +5267,7 @@ static bool signed_add32_overflows(s64 a, s64 b) return res < a; } -static bool signed_sub_overflows(s32 a, s32 b) +static bool signed_sub_overflows(s64 a, s64 b) { /* Do the sub in u64, where overflow is well-defined */ s64 res = (s64)((u64)a - (u64)b); @@ -5277,7 +5279,7 @@ static bool signed_sub_overflows(s32 a, s32 b) static bool signed_sub32_overflows(s32 a, s32 b) { - /* Do the sub in u64, where overflow is well-defined */ + /* Do the sub in u32, where overflow is well-defined */ s32 res = (s32)((u32)a - (u32)b); if (b < 0) diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index 191c329e482ad9..32596fdbcd5b8e 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c @@ -908,6 +908,8 @@ int cgroup1_parse_param(struct fs_context *fc, struct fs_parameter *param) opt = fs_parse(fc, cgroup1_fs_parameters, param, &result); if (opt == -ENOPARAM) { if (strcmp(param->key, "source") == 0) { + if (fc->source) + return invalf(fc, "Multiple sources not supported"); fc->source = param->string; param->string = NULL; return 0; diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index 57b5b5d0a5fdd9..53c70c470a38d7 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -983,25 +983,48 @@ partition_and_rebuild_sched_domains(int ndoms_new, cpumask_var_t doms_new[], */ static void rebuild_sched_domains_locked(void) { + struct cgroup_subsys_state *pos_css; struct sched_domain_attr *attr; cpumask_var_t *doms; + struct cpuset *cs; int ndoms; lockdep_assert_cpus_held(); percpu_rwsem_assert_held(&cpuset_rwsem); /* - * We have raced with CPU hotplug. Don't do anything to avoid + * If we have raced with CPU hotplug, return early to avoid * passing doms with offlined cpu to partition_sched_domains(). - * Anyways, hotplug work item will rebuild sched domains. + * Anyways, cpuset_hotplug_workfn() will rebuild sched domains. + * + * With no CPUs in any subpartitions, top_cpuset's effective CPUs + * should be the same as the active CPUs, so checking only top_cpuset + * is enough to detect racing CPU offlines. */ if (!top_cpuset.nr_subparts_cpus && !cpumask_equal(top_cpuset.effective_cpus, cpu_active_mask)) return; - if (top_cpuset.nr_subparts_cpus && - !cpumask_subset(top_cpuset.effective_cpus, cpu_active_mask)) - return; + /* + * With subpartition CPUs, however, the effective CPUs of a partition + * root should be only a subset of the active CPUs. Since a CPU in any + * partition root could be offlined, all must be checked. + */ + if (top_cpuset.nr_subparts_cpus) { + rcu_read_lock(); + cpuset_for_each_descendant_pre(cs, pos_css, &top_cpuset) { + if (!is_partition_root(cs)) { + pos_css = css_rightmost_descendant(pos_css); + continue; + } + if (!cpumask_subset(cs->effective_cpus, + cpu_active_mask)) { + rcu_read_unlock(); + return; + } + } + rcu_read_unlock(); + } /* Generate domain masks and attrs */ ndoms = generate_sched_domains(&doms, &attr); diff --git a/kernel/events/core.c b/kernel/events/core.c index dc568ca295bdc1..c3ba29d058b731 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1325,7 +1325,7 @@ static void put_ctx(struct perf_event_context *ctx) * function. * * Lock order: - * exec_update_mutex + * exec_update_lock * task_struct::perf_event_mutex * perf_event_context::mutex * perf_event::child_mutex; @@ -11720,24 +11720,6 @@ SYSCALL_DEFINE5(perf_event_open, goto err_task; } - if (task) { - err = mutex_lock_interruptible(&task->signal->exec_update_mutex); - if (err) - goto err_task; - - /* - * Preserve ptrace permission check for backwards compatibility. - * - * We must hold exec_update_mutex across this and any potential - * perf_install_in_context() call for this new event to - * serialize against exec() altering our credentials (and the - * perf_event_exit_task() that could imply). - */ - err = -EACCES; - if (!perfmon_capable() && !ptrace_may_access(task, PTRACE_MODE_READ_REALCREDS)) - goto err_cred; - } - if (flags & PERF_FLAG_PID_CGROUP) cgroup_fd = pid; @@ -11745,7 +11727,7 @@ SYSCALL_DEFINE5(perf_event_open, NULL, NULL, cgroup_fd); if (IS_ERR(event)) { err = PTR_ERR(event); - goto err_cred; + goto err_task; } if (is_sampling_event(event)) { @@ -11864,6 +11846,24 @@ SYSCALL_DEFINE5(perf_event_open, goto err_context; } + if (task) { + err = down_read_interruptible(&task->signal->exec_update_lock); + if (err) + goto err_file; + + /* + * Preserve ptrace permission check for backwards compatibility. + * + * We must hold exec_update_lock across this and any potential + * perf_install_in_context() call for this new event to + * serialize against exec() altering our credentials (and the + * perf_event_exit_task() that could imply). + */ + err = -EACCES; + if (!perfmon_capable() && !ptrace_may_access(task, PTRACE_MODE_READ_REALCREDS)) + goto err_cred; + } + if (move_group) { gctx = __perf_event_ctx_lock_double(group_leader, ctx); @@ -12017,7 +12017,7 @@ SYSCALL_DEFINE5(perf_event_open, mutex_unlock(&ctx->mutex); if (task) { - mutex_unlock(&task->signal->exec_update_mutex); + up_read(&task->signal->exec_update_lock); put_task_struct(task); } @@ -12039,7 +12039,10 @@ SYSCALL_DEFINE5(perf_event_open, if (move_group) perf_event_ctx_unlock(group_leader, gctx); mutex_unlock(&ctx->mutex); -/* err_file: */ +err_cred: + if (task) + up_read(&task->signal->exec_update_lock); +err_file: fput(event_file); err_context: perf_unpin_context(ctx); @@ -12051,9 +12054,6 @@ SYSCALL_DEFINE5(perf_event_open, */ if (!event_file) free_event(event); -err_cred: - if (task) - mutex_unlock(&task->signal->exec_update_mutex); err_task: if (task) put_task_struct(task); @@ -12358,7 +12358,7 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn) /* * When a child task exits, feed back event values to parent events. * - * Can be called with exec_update_mutex held when called from + * Can be called with exec_update_lock held when called from * setup_new_exec(). */ void perf_event_exit_task(struct task_struct *child) diff --git a/kernel/exit.c b/kernel/exit.c index 1f236ed375f83c..d13d67fc5f4e20 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -63,6 +63,7 @@ #include #include #include +#include #include #include @@ -762,6 +763,7 @@ void __noreturn do_exit(long code) schedule(); } + io_uring_files_cancel(tsk->files); exit_signals(tsk); /* sets PF_EXITING */ /* sync mm's RSS info before statistics gathering */ diff --git a/kernel/fork.c b/kernel/fork.c index 6d266388d3804c..c675fdbd3dce13 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1007,6 +1007,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, mm->vmacache_seqnum = 0; atomic_set(&mm->mm_users, 1); atomic_set(&mm->mm_count, 1); + seqcount_init(&mm->write_protect_seq); mmap_init_lock(mm); INIT_LIST_HEAD(&mm->mmlist); mm->core_state = NULL; @@ -1221,7 +1222,7 @@ struct mm_struct *mm_access(struct task_struct *task, unsigned int mode) struct mm_struct *mm; int err; - err = mutex_lock_killable(&task->signal->exec_update_mutex); + err = down_read_killable(&task->signal->exec_update_lock); if (err) return ERR_PTR(err); @@ -1231,7 +1232,7 @@ struct mm_struct *mm_access(struct task_struct *task, unsigned int mode) mmput(mm); mm = ERR_PTR(-EACCES); } - mutex_unlock(&task->signal->exec_update_mutex); + up_read(&task->signal->exec_update_lock); return mm; } @@ -1591,7 +1592,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) sig->oom_score_adj_min = current->signal->oom_score_adj_min; mutex_init(&sig->cred_guard_mutex); - mutex_init(&sig->exec_update_mutex); + init_rwsem(&sig->exec_update_lock); return 0; } diff --git a/kernel/futex.c b/kernel/futex.c index 00259c7e288eea..0693b3ea0f9a44 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -765,6 +765,29 @@ static struct futex_pi_state *alloc_pi_state(void) return pi_state; } +static void pi_state_update_owner(struct futex_pi_state *pi_state, + struct task_struct *new_owner) +{ + struct task_struct *old_owner = pi_state->owner; + + lockdep_assert_held(&pi_state->pi_mutex.wait_lock); + + if (old_owner) { + raw_spin_lock(&old_owner->pi_lock); + WARN_ON(list_empty(&pi_state->list)); + list_del_init(&pi_state->list); + raw_spin_unlock(&old_owner->pi_lock); + } + + if (new_owner) { + raw_spin_lock(&new_owner->pi_lock); + WARN_ON(!list_empty(&pi_state->list)); + list_add(&pi_state->list, &new_owner->pi_state_list); + pi_state->owner = new_owner; + raw_spin_unlock(&new_owner->pi_lock); + } +} + static void get_pi_state(struct futex_pi_state *pi_state) { WARN_ON_ONCE(!refcount_inc_not_zero(&pi_state->refcount)); @@ -787,17 +810,11 @@ static void put_pi_state(struct futex_pi_state *pi_state) * and has cleaned up the pi_state already */ if (pi_state->owner) { - struct task_struct *owner; unsigned long flags; raw_spin_lock_irqsave(&pi_state->pi_mutex.wait_lock, flags); - owner = pi_state->owner; - if (owner) { - raw_spin_lock(&owner->pi_lock); - list_del_init(&pi_state->list); - raw_spin_unlock(&owner->pi_lock); - } - rt_mutex_proxy_unlock(&pi_state->pi_mutex, owner); + pi_state_update_owner(pi_state, NULL); + rt_mutex_proxy_unlock(&pi_state->pi_mutex); raw_spin_unlock_irqrestore(&pi_state->pi_mutex.wait_lock, flags); } @@ -943,7 +960,8 @@ static inline void exit_pi_state_list(struct task_struct *curr) { } * FUTEX_OWNER_DIED bit. See [4] * * [10] There is no transient state which leaves owner and user space - * TID out of sync. + * TID out of sync. Except one error case where the kernel is denied + * write access to the user address, see fixup_pi_state_owner(). * * * Serialization and lifetime rules: @@ -1523,26 +1541,15 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_pi_state *pi_ ret = -EINVAL; } - if (ret) - goto out_unlock; - - /* - * This is a point of no return; once we modify the uval there is no - * going back and subsequent operations must not fail. - */ - - raw_spin_lock(&pi_state->owner->pi_lock); - WARN_ON(list_empty(&pi_state->list)); - list_del_init(&pi_state->list); - raw_spin_unlock(&pi_state->owner->pi_lock); - - raw_spin_lock(&new_owner->pi_lock); - WARN_ON(!list_empty(&pi_state->list)); - list_add(&pi_state->list, &new_owner->pi_state_list); - pi_state->owner = new_owner; - raw_spin_unlock(&new_owner->pi_lock); - - postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q); + if (!ret) { + /* + * This is a point of no return; once we modified the uval + * there is no going back and subsequent operations must + * not fail. + */ + pi_state_update_owner(pi_state, new_owner); + postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q); + } out_unlock: raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); @@ -2325,18 +2332,13 @@ static void unqueue_me_pi(struct futex_q *q) spin_unlock(q->lock_ptr); } -static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, - struct task_struct *argowner) +static int __fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, + struct task_struct *argowner) { struct futex_pi_state *pi_state = q->pi_state; - u32 uval, curval, newval; struct task_struct *oldowner, *newowner; - u32 newtid; - int ret, err = 0; - - lockdep_assert_held(q->lock_ptr); - - raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); + u32 uval, curval, newval, newtid; + int err = 0; oldowner = pi_state->owner; @@ -2370,14 +2372,12 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, * We raced against a concurrent self; things are * already fixed up. Nothing to do. */ - ret = 0; - goto out_unlock; + return 0; } if (__rt_mutex_futex_trylock(&pi_state->pi_mutex)) { - /* We got the lock after all, nothing to fix. */ - ret = 0; - goto out_unlock; + /* We got the lock. pi_state is correct. Tell caller. */ + return 1; } /* @@ -2404,8 +2404,7 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, * We raced against a concurrent self; things are * already fixed up. Nothing to do. */ - ret = 0; - goto out_unlock; + return 1; } newowner = argowner; } @@ -2435,22 +2434,9 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, * We fixed up user space. Now we need to fix the pi_state * itself. */ - if (pi_state->owner != NULL) { - raw_spin_lock(&pi_state->owner->pi_lock); - WARN_ON(list_empty(&pi_state->list)); - list_del_init(&pi_state->list); - raw_spin_unlock(&pi_state->owner->pi_lock); - } + pi_state_update_owner(pi_state, newowner); - pi_state->owner = newowner; - - raw_spin_lock(&newowner->pi_lock); - WARN_ON(!list_empty(&pi_state->list)); - list_add(&pi_state->list, &newowner->pi_state_list); - raw_spin_unlock(&newowner->pi_lock); - raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); - - return 0; + return argowner == current; /* * In order to reschedule or handle a page fault, we need to drop the @@ -2471,17 +2457,16 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, switch (err) { case -EFAULT: - ret = fault_in_user_writeable(uaddr); + err = fault_in_user_writeable(uaddr); break; case -EAGAIN: cond_resched(); - ret = 0; + err = 0; break; default: WARN_ON_ONCE(1); - ret = err; break; } @@ -2491,17 +2476,44 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, /* * Check if someone else fixed it for us: */ - if (pi_state->owner != oldowner) { - ret = 0; - goto out_unlock; - } + if (pi_state->owner != oldowner) + return argowner == current; - if (ret) - goto out_unlock; + /* Retry if err was -EAGAIN or the fault in succeeded */ + if (!err) + goto retry; - goto retry; + /* + * fault_in_user_writeable() failed so user state is immutable. At + * best we can make the kernel state consistent but user state will + * be most likely hosed and any subsequent unlock operation will be + * rejected due to PI futex rule [10]. + * + * Ensure that the rtmutex owner is also the pi_state owner despite + * the user space value claiming something different. There is no + * point in unlocking the rtmutex if current is the owner as it + * would need to wait until the next waiter has taken the rtmutex + * to guarantee consistent state. Keep it simple. Userspace asked + * for this wreckaged state. + * + * The rtmutex has an owner - either current or some other + * task. See the EAGAIN loop above. + */ + pi_state_update_owner(pi_state, rt_mutex_owner(&pi_state->pi_mutex)); -out_unlock: + return err; +} + +static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, + struct task_struct *argowner) +{ + struct futex_pi_state *pi_state = q->pi_state; + int ret; + + lockdep_assert_held(q->lock_ptr); + + raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); + ret = __fixup_pi_state_owner(uaddr, q, argowner); raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); return ret; } @@ -2525,8 +2537,6 @@ static long futex_wait_restart(struct restart_block *restart); */ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked) { - int ret = 0; - if (locked) { /* * Got the lock. We might not be the anticipated owner if we @@ -2537,8 +2547,8 @@ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked) * stable state, anything else needs more attention. */ if (q->pi_state->owner != current) - ret = fixup_pi_state_owner(uaddr, q, current); - return ret ? ret : locked; + return fixup_pi_state_owner(uaddr, q, current); + return 1; } /* @@ -2549,23 +2559,17 @@ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked) * Another speculative read; pi_state->owner == current is unstable * but needs our attention. */ - if (q->pi_state->owner == current) { - ret = fixup_pi_state_owner(uaddr, q, NULL); - return ret; - } + if (q->pi_state->owner == current) + return fixup_pi_state_owner(uaddr, q, NULL); /* * Paranoia check. If we did not take the lock, then we should not be - * the owner of the rt_mutex. + * the owner of the rt_mutex. Warn and establish consistent state. */ - if (rt_mutex_owner(&q->pi_state->pi_mutex) == current) { - printk(KERN_ERR "fixup_owner: ret = %d pi-mutex: %p " - "pi-state %p\n", ret, - q->pi_state->pi_mutex.owner, - q->pi_state->owner); - } + if (WARN_ON_ONCE(rt_mutex_owner(&q->pi_state->pi_mutex) == current)) + return fixup_pi_state_owner(uaddr, q, current); - return ret; + return 0; } /** @@ -2773,7 +2777,6 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags, ktime_t *time, int trylock) { struct hrtimer_sleeper timeout, *to; - struct futex_pi_state *pi_state = NULL; struct task_struct *exiting = NULL; struct rt_mutex_waiter rt_waiter; struct futex_hash_bucket *hb; @@ -2909,23 +2912,8 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags, if (res) ret = (res < 0) ? res : 0; - /* - * If fixup_owner() faulted and was unable to handle the fault, unlock - * it and return the fault to userspace. - */ - if (ret && (rt_mutex_owner(&q.pi_state->pi_mutex) == current)) { - pi_state = q.pi_state; - get_pi_state(pi_state); - } - /* Unqueue and drop the lock */ unqueue_me_pi(&q); - - if (pi_state) { - rt_mutex_futex_unlock(&pi_state->pi_mutex); - put_pi_state(pi_state); - } - goto out; out_unlock_put_key: @@ -3185,7 +3173,6 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, u32 __user *uaddr2) { struct hrtimer_sleeper timeout, *to; - struct futex_pi_state *pi_state = NULL; struct rt_mutex_waiter rt_waiter; struct futex_hash_bucket *hb; union futex_key key2 = FUTEX_KEY_INIT; @@ -3263,16 +3250,17 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, if (q.pi_state && (q.pi_state->owner != current)) { spin_lock(q.lock_ptr); ret = fixup_pi_state_owner(uaddr2, &q, current); - if (ret && rt_mutex_owner(&q.pi_state->pi_mutex) == current) { - pi_state = q.pi_state; - get_pi_state(pi_state); - } /* * Drop the reference to the pi state which * the requeue_pi() code acquired for us. */ put_pi_state(q.pi_state); spin_unlock(q.lock_ptr); + /* + * Adjust the return value. It's either -EFAULT or + * success (1) but the caller expects 0 for success. + */ + ret = ret < 0 ? ret : 0; } } else { struct rt_mutex *pi_mutex; @@ -3303,25 +3291,10 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, if (res) ret = (res < 0) ? res : 0; - /* - * If fixup_pi_state_owner() faulted and was unable to handle - * the fault, unlock the rt_mutex and return the fault to - * userspace. - */ - if (ret && rt_mutex_owner(&q.pi_state->pi_mutex) == current) { - pi_state = q.pi_state; - get_pi_state(pi_state); - } - /* Unqueue and drop the lock. */ unqueue_me_pi(&q); } - if (pi_state) { - rt_mutex_futex_unlock(&pi_state->pi_mutex); - put_pi_state(pi_state); - } - if (ret == -EINTR) { /* * We've already been requeued, but cannot restart by calling diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index e4ca69608f3b86..c6b419db68efc5 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -1373,8 +1373,15 @@ static void irq_domain_free_irqs_hierarchy(struct irq_domain *domain, unsigned int irq_base, unsigned int nr_irqs) { - if (domain->ops->free) - domain->ops->free(domain, irq_base, nr_irqs); + unsigned int i; + + if (!domain->ops->free) + return; + + for (i = 0; i < nr_irqs; i++) { + if (irq_domain_get_irq_data(domain, irq_base + i)) + domain->ops->free(domain, irq_base + i, 1); + } } int irq_domain_alloc_irqs_hierarchy(struct irq_domain *domain, diff --git a/kernel/kcmp.c b/kernel/kcmp.c index b3ff9288c6cc91..c0d2ad9b4705d8 100644 --- a/kernel/kcmp.c +++ b/kernel/kcmp.c @@ -75,25 +75,25 @@ get_file_raw_ptr(struct task_struct *task, unsigned int idx) return file; } -static void kcmp_unlock(struct mutex *m1, struct mutex *m2) +static void kcmp_unlock(struct rw_semaphore *l1, struct rw_semaphore *l2) { - if (likely(m2 != m1)) - mutex_unlock(m2); - mutex_unlock(m1); + if (likely(l2 != l1)) + up_read(l2); + up_read(l1); } -static int kcmp_lock(struct mutex *m1, struct mutex *m2) +static int kcmp_lock(struct rw_semaphore *l1, struct rw_semaphore *l2) { int err; - if (m2 > m1) - swap(m1, m2); + if (l2 > l1) + swap(l1, l2); - err = mutex_lock_killable(m1); - if (!err && likely(m1 != m2)) { - err = mutex_lock_killable_nested(m2, SINGLE_DEPTH_NESTING); + err = down_read_killable(l1); + if (!err && likely(l1 != l2)) { + err = down_read_killable_nested(l2, SINGLE_DEPTH_NESTING); if (err) - mutex_unlock(m1); + up_read(l1); } return err; @@ -173,8 +173,8 @@ SYSCALL_DEFINE5(kcmp, pid_t, pid1, pid_t, pid2, int, type, /* * One should have enough rights to inspect task details. */ - ret = kcmp_lock(&task1->signal->exec_update_mutex, - &task2->signal->exec_update_mutex); + ret = kcmp_lock(&task1->signal->exec_update_lock, + &task2->signal->exec_update_lock); if (ret) goto err; if (!ptrace_may_access(task1, PTRACE_MODE_READ_REALCREDS) || @@ -229,8 +229,8 @@ SYSCALL_DEFINE5(kcmp, pid_t, pid1, pid_t, pid2, int, type, } err_unlock: - kcmp_unlock(&task1->signal->exec_update_mutex, - &task2->signal->exec_update_mutex); + kcmp_unlock(&task1->signal->exec_update_lock, + &task2->signal->exec_update_lock); err: put_task_struct(task1); put_task_struct(task2); diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c index 8798a8183974e3..c589c7a9562ca6 100644 --- a/kernel/kexec_core.c +++ b/kernel/kexec_core.c @@ -1135,7 +1135,6 @@ int kernel_kexec(void) #ifdef CONFIG_KEXEC_JUMP if (kexec_image->preserve_context) { - lock_system_sleep(); pm_prepare_console(); error = freeze_processes(); if (error) { @@ -1198,7 +1197,6 @@ int kernel_kexec(void) thaw_processes(); Restore_console: pm_restore_console(); - unlock_system_sleep(); } #endif diff --git a/kernel/kthread.c b/kernel/kthread.c index 933a625621b8db..5edf7e19ab2626 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -493,11 +493,36 @@ struct task_struct *kthread_create_on_cpu(int (*threadfn)(void *data), return p; kthread_bind(p, cpu); /* CPU hotplug need to bind once again when unparking the thread. */ - set_bit(KTHREAD_IS_PER_CPU, &to_kthread(p)->flags); to_kthread(p)->cpu = cpu; return p; } +void kthread_set_per_cpu(struct task_struct *k, int cpu) +{ + struct kthread *kthread = to_kthread(k); + if (!kthread) + return; + + WARN_ON_ONCE(!(k->flags & PF_NO_SETAFFINITY)); + + if (cpu < 0) { + clear_bit(KTHREAD_IS_PER_CPU, &kthread->flags); + return; + } + + kthread->cpu = cpu; + set_bit(KTHREAD_IS_PER_CPU, &kthread->flags); +} + +bool kthread_is_per_cpu(struct task_struct *k) +{ + struct kthread *kthread = to_kthread(k); + if (!kthread) + return false; + + return test_bit(KTHREAD_IS_PER_CPU, &kthread->flags); +} + /** * kthread_unpark - unpark a thread created by kthread_create(). * @k: thread created by kthread_create(). diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index c1418b47f625a2..bdaf4829098c02 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -79,7 +79,7 @@ module_param(lock_stat, int, 0644); DEFINE_PER_CPU(unsigned int, lockdep_recursion); EXPORT_PER_CPU_SYMBOL_GPL(lockdep_recursion); -static inline bool lockdep_enabled(void) +static __always_inline bool lockdep_enabled(void) { if (!debug_locks) return false; @@ -5271,12 +5271,15 @@ static void __lock_unpin_lock(struct lockdep_map *lock, struct pin_cookie cookie /* * Check whether we follow the irq-flags state precisely: */ -static void check_flags(unsigned long flags) +static noinstr void check_flags(unsigned long flags) { #if defined(CONFIG_PROVE_LOCKING) && defined(CONFIG_DEBUG_LOCKDEP) if (!debug_locks) return; + /* Get the warning out.. */ + instrumentation_begin(); + if (irqs_disabled_flags(flags)) { if (DEBUG_LOCKS_WARN_ON(lockdep_hardirqs_enabled())) { printk("possible reason: unannotated irqs-off.\n"); @@ -5304,6 +5307,8 @@ static void check_flags(unsigned long flags) if (!debug_locks) print_irqtrace_events(current); + + instrumentation_end(); #endif } diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index cfdd5b93264d7e..2f8cd616d3b29a 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -1716,8 +1716,7 @@ void rt_mutex_init_proxy_locked(struct rt_mutex *lock, * possible because it belongs to the pi_state which is about to be freed * and it is not longer visible to other tasks. */ -void rt_mutex_proxy_unlock(struct rt_mutex *lock, - struct task_struct *proxy_owner) +void rt_mutex_proxy_unlock(struct rt_mutex *lock) { debug_rt_mutex_proxy_unlock(lock); rt_mutex_set_owner(lock, NULL); diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h index d1d62f942be228..ca6fb489007b6b 100644 --- a/kernel/locking/rtmutex_common.h +++ b/kernel/locking/rtmutex_common.h @@ -133,8 +133,7 @@ enum rtmutex_chainwalk { extern struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock); extern void rt_mutex_init_proxy_locked(struct rt_mutex *lock, struct task_struct *proxy_owner); -extern void rt_mutex_proxy_unlock(struct rt_mutex *lock, - struct task_struct *proxy_owner); +extern void rt_mutex_proxy_unlock(struct rt_mutex *lock); extern void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter); extern int __rt_mutex_start_proxy_lock(struct rt_mutex *lock, struct rt_mutex_waiter *waiter, diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c index f11b9bd3431d28..a163542d178ee1 100644 --- a/kernel/locking/rwsem.c +++ b/kernel/locking/rwsem.c @@ -1345,6 +1345,18 @@ static inline void __down_read(struct rw_semaphore *sem) } } +static inline int __down_read_interruptible(struct rw_semaphore *sem) +{ + if (!rwsem_read_trylock(sem)) { + if (IS_ERR(rwsem_down_read_slowpath(sem, TASK_INTERRUPTIBLE))) + return -EINTR; + DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem); + } else { + rwsem_set_reader_owned(sem); + } + return 0; +} + static inline int __down_read_killable(struct rw_semaphore *sem) { if (!rwsem_read_trylock(sem)) { @@ -1495,6 +1507,20 @@ void __sched down_read(struct rw_semaphore *sem) } EXPORT_SYMBOL(down_read); +int __sched down_read_interruptible(struct rw_semaphore *sem) +{ + might_sleep(); + rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_); + + if (LOCK_CONTENDED_RETURN(sem, __down_read_trylock, __down_read_interruptible)) { + rwsem_release(&sem->dep_map, _RET_IP_); + return -EINTR; + } + + return 0; +} +EXPORT_SYMBOL(down_read_interruptible); + int __sched down_read_killable(struct rw_semaphore *sem) { might_sleep(); @@ -1605,6 +1631,20 @@ void down_read_nested(struct rw_semaphore *sem, int subclass) } EXPORT_SYMBOL(down_read_nested); +int down_read_killable_nested(struct rw_semaphore *sem, int subclass) +{ + might_sleep(); + rwsem_acquire_read(&sem->dep_map, subclass, 0, _RET_IP_); + + if (LOCK_CONTENDED_RETURN(sem, __down_read_trylock, __down_read_killable)) { + rwsem_release(&sem->dep_map, _RET_IP_); + return -EINTR; + } + + return 0; +} +EXPORT_SYMBOL(down_read_killable_nested); + void _down_write_nest_lock(struct rw_semaphore *sem, struct lockdep_map *nest) { might_sleep(); diff --git a/kernel/module.c b/kernel/module.c index a4fa44a652a757..e20499309b2af6 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1895,7 +1895,6 @@ static int mod_sysfs_init(struct module *mod) if (err) mod_kobject_put(mod); - /* delay uevent until full sysfs population */ out: return err; } @@ -1932,7 +1931,6 @@ static int mod_sysfs_setup(struct module *mod, add_sect_attrs(mod, info); add_notes_attrs(mod, info); - kobject_uevent(&mod->mkobj.kobj, KOBJ_ADD); return 0; out_unreg_modinfo_attrs: @@ -3639,6 +3637,9 @@ static noinline int do_init_module(struct module *mod) blocking_notifier_call_chain(&module_notify_list, MODULE_STATE_LIVE, mod); + /* Delay uevent until module has finished its init routine */ + kobject_uevent(&mod->mkobj.kobj, KOBJ_ADD); + /* * We need to finish all async code before the module init sequence * is done. This has potential to deadlock. For example, a newly @@ -3991,6 +3992,7 @@ static int load_module(struct load_info *info, const char __user *uargs, MODULE_STATE_GOING, mod); klp_module_going(mod); bug_cleanup: + mod->state = MODULE_STATE_GOING; /* module_bug_cleanup needs module_mutex protection */ mutex_lock(&module_mutex); module_bug_cleanup(mod); diff --git a/kernel/pid.c b/kernel/pid.c index a96bc4bf4f8698..4856818c9de1ae 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -628,7 +628,7 @@ static struct file *__pidfd_fget(struct task_struct *task, int fd) struct file *file; int ret; - ret = mutex_lock_killable(&task->signal->exec_update_mutex); + ret = down_read_killable(&task->signal->exec_update_lock); if (ret) return ERR_PTR(ret); @@ -637,7 +637,7 @@ static struct file *__pidfd_fget(struct task_struct *task, int fd) else file = ERR_PTR(-EPERM); - mutex_unlock(&task->signal->exec_update_mutex); + up_read(&task->signal->exec_update_lock); return file ?: ERR_PTR(-EBADF); } diff --git a/kernel/power/swap.c b/kernel/power/swap.c index c73f2e295167d1..72e33054a2e1b7 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -497,10 +497,10 @@ static int swap_writer_finish(struct swap_map_handle *handle, unsigned int flags, int error) { if (!error) { - flush_swap_writer(handle); pr_info("S"); error = mark_swapfiles(handle, flags); pr_cont("|\n"); + flush_swap_writer(handle); } if (error) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index bc1e3b5a97bddb..aafec8cb8637df 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -1338,11 +1338,16 @@ static size_t info_print_prefix(const struct printk_info *info, bool syslog, * done: * * - Add prefix for each line. + * - Drop truncated lines that no longer fit into the buffer. * - Add the trailing newline that has been removed in vprintk_store(). - * - Drop truncated lines that do not longer fit into the buffer. + * - Add a string terminator. + * + * Since the produced string is always terminated, the maximum possible + * return value is @r->text_buf_size - 1; * * Return: The length of the updated/prepared text, including the added - * prefixes and the newline. The dropped line(s) are not counted. + * prefixes and the newline. The terminator is not counted. The dropped + * line(s) are not counted. */ static size_t record_print_text(struct printk_record *r, bool syslog, bool time) @@ -1385,26 +1390,31 @@ static size_t record_print_text(struct printk_record *r, bool syslog, /* * Truncate the text if there is not enough space to add the - * prefix and a trailing newline. + * prefix and a trailing newline and a terminator. */ - if (len + prefix_len + text_len + 1 > buf_size) { + if (len + prefix_len + text_len + 1 + 1 > buf_size) { /* Drop even the current line if no space. */ - if (len + prefix_len + line_len + 1 > buf_size) + if (len + prefix_len + line_len + 1 + 1 > buf_size) break; - text_len = buf_size - len - prefix_len - 1; + text_len = buf_size - len - prefix_len - 1 - 1; truncated = true; } memmove(text + prefix_len, text, text_len); memcpy(text, prefix, prefix_len); + /* + * Increment the prepared length to include the text and + * prefix that were just moved+copied. Also increment for the + * newline at the end of this line. If this is the last line, + * there is no newline, but it will be added immediately below. + */ len += prefix_len + line_len + 1; - if (text_len == line_len) { /* - * Add the trailing newline removed in - * vprintk_store(). + * This is the last line. Add the trailing newline + * removed in vprintk_store(). */ text[prefix_len + line_len] = '\n'; break; @@ -1429,6 +1439,14 @@ static size_t record_print_text(struct printk_record *r, bool syslog, text_len -= line_len + 1; } + /* + * If a buffer was provided, it will be terminated. Space for the + * string terminator is guaranteed to be available. The terminator is + * not counted in the return value. + */ + if (buf_size > 0) + r->text_buf[len] = 0; + return len; } @@ -3376,7 +3394,7 @@ bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog, while (prb_read_valid_info(prb, seq, &info, &line_count)) { if (r.info->seq >= dumper->next_seq) break; - l += get_record_print_text_size(&info, line_count, true, time); + l += get_record_print_text_size(&info, line_count, syslog, time); seq = r.info->seq + 1; } @@ -3386,7 +3404,7 @@ bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog, &info, &line_count)) { if (r.info->seq >= dumper->next_seq) break; - l -= get_record_print_text_size(&info, line_count, true, time); + l -= get_record_print_text_size(&info, line_count, syslog, time); seq = r.info->seq + 1; } diff --git a/kernel/printk/printk_ringbuffer.c b/kernel/printk/printk_ringbuffer.c index 74e25a1704f2b7..617dd635896505 100644 --- a/kernel/printk/printk_ringbuffer.c +++ b/kernel/printk/printk_ringbuffer.c @@ -1720,7 +1720,7 @@ static bool copy_data(struct prb_data_ring *data_ring, /* Caller interested in the line count? */ if (line_count) - *line_count = count_lines(data, data_size); + *line_count = count_lines(data, len); /* Caller interested in the data content? */ if (!buf || !buf_size) diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index d5d9f2d03e8a0a..73bbe792fe1e8a 100644 --- a/kernel/rcu/tasks.h +++ b/kernel/rcu/tasks.h @@ -241,7 +241,7 @@ static int __noreturn rcu_tasks_kthread(void *arg) } } -/* Spawn RCU-tasks grace-period kthread, e.g., at core_initcall() time. */ +/* Spawn RCU-tasks grace-period kthread. */ static void __init rcu_spawn_tasks_kthread_generic(struct rcu_tasks *rtp) { struct task_struct *t; @@ -569,7 +569,6 @@ static int __init rcu_spawn_tasks_kthread(void) rcu_spawn_tasks_kthread_generic(&rcu_tasks); return 0; } -core_initcall(rcu_spawn_tasks_kthread); #ifndef CONFIG_TINY_RCU static void show_rcu_tasks_classic_gp_kthread(void) @@ -697,7 +696,6 @@ static int __init rcu_spawn_tasks_rude_kthread(void) rcu_spawn_tasks_kthread_generic(&rcu_tasks_rude); return 0; } -core_initcall(rcu_spawn_tasks_rude_kthread); #ifndef CONFIG_TINY_RCU static void show_rcu_tasks_rude_gp_kthread(void) @@ -975,6 +973,11 @@ static void rcu_tasks_trace_pregp_step(void) static void rcu_tasks_trace_pertask(struct task_struct *t, struct list_head *hop) { + // During early boot when there is only the one boot CPU, there + // is no idle task for the other CPUs. Just return. + if (unlikely(t == NULL)) + return; + WRITE_ONCE(t->trc_reader_special.b.need_qs, false); WRITE_ONCE(t->trc_reader_checked, false); t->trc_ipi_to_cpu = -1; @@ -1200,7 +1203,6 @@ static int __init rcu_spawn_tasks_trace_kthread(void) rcu_spawn_tasks_kthread_generic(&rcu_tasks_trace); return 0; } -core_initcall(rcu_spawn_tasks_trace_kthread); #ifndef CONFIG_TINY_RCU static void show_rcu_tasks_trace_gp_kthread(void) @@ -1229,6 +1231,21 @@ void show_rcu_tasks_gp_kthreads(void) } #endif /* #ifndef CONFIG_TINY_RCU */ +void __init rcu_init_tasks_generic(void) +{ +#ifdef CONFIG_TASKS_RCU + rcu_spawn_tasks_kthread(); +#endif + +#ifdef CONFIG_TASKS_RUDE_RCU + rcu_spawn_tasks_rude_kthread(); +#endif + +#ifdef CONFIG_TASKS_TRACE_RCU + rcu_spawn_tasks_trace_kthread(); +#endif +} + #else /* #ifdef CONFIG_TASKS_RCU_GENERIC */ static inline void rcu_tasks_bootup_oddness(void) {} void show_rcu_tasks_gp_kthreads(void) {} diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index bd04b09b84b327..593df7edfe97f2 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -177,7 +177,7 @@ module_param(rcu_unlock_delay, int, 0444); * per-CPU. Object size is equal to one page. This value * can be changed at boot time. */ -static int rcu_min_cached_objs = 2; +static int rcu_min_cached_objs = 5; module_param(rcu_min_cached_objs, int, 0444); /* Retrieve RCU kthreads priority for rcutorture */ @@ -928,8 +928,8 @@ void __rcu_irq_enter_check_tick(void) { struct rcu_data *rdp = this_cpu_ptr(&rcu_data); - // Enabling the tick is unsafe in NMI handlers. - if (WARN_ON_ONCE(in_nmi())) + // If we're here from NMI there's nothing to do. + if (in_nmi()) return; RCU_LOCKDEP_WARN(rcu_dynticks_curr_cpu_in_eqs(), @@ -1093,8 +1093,11 @@ static void rcu_disable_urgency_upon_qs(struct rcu_data *rdp) * CPU can safely enter RCU read-side critical sections. In other words, * if the current CPU is not in its idle loop or is in an interrupt or * NMI handler, return true. + * + * Make notrace because it can be called by the internal functions of + * ftrace, and making this notrace removes unnecessary recursion calls. */ -bool rcu_is_watching(void) +notrace bool rcu_is_watching(void) { bool ret; @@ -3084,6 +3087,9 @@ struct kfree_rcu_cpu_work { * In order to save some per-cpu space the list is singular. * Even though it is lockless an access has to be protected by the * per-cpu lock. + * @page_cache_work: A work to refill the cache when it is empty + * @work_in_progress: Indicates that page_cache_work is running + * @hrtimer: A hrtimer for scheduling a page_cache_work * @nr_bkv_objs: number of allocated objects at @bkvcache. * * This is a per-CPU structure. The reason that it is not included in @@ -3100,6 +3106,11 @@ struct kfree_rcu_cpu { bool monitor_todo; bool initialized; int count; + + struct work_struct page_cache_work; + atomic_t work_in_progress; + struct hrtimer hrtimer; + struct llist_head bkvcache; int nr_bkv_objs; }; @@ -3217,10 +3228,10 @@ static void kfree_rcu_work(struct work_struct *work) } rcu_lock_release(&rcu_callback_map); - krcp = krc_this_cpu_lock(&flags); + raw_spin_lock_irqsave(&krcp->lock, flags); if (put_cached_bnode(krcp, bkvhead[i])) bkvhead[i] = NULL; - krc_this_cpu_unlock(krcp, flags); + raw_spin_unlock_irqrestore(&krcp->lock, flags); if (bkvhead[i]) free_page((unsigned long) bkvhead[i]); @@ -3347,6 +3358,57 @@ static void kfree_rcu_monitor(struct work_struct *work) raw_spin_unlock_irqrestore(&krcp->lock, flags); } +static enum hrtimer_restart +schedule_page_work_fn(struct hrtimer *t) +{ + struct kfree_rcu_cpu *krcp = + container_of(t, struct kfree_rcu_cpu, hrtimer); + + queue_work(system_highpri_wq, &krcp->page_cache_work); + return HRTIMER_NORESTART; +} + +static void fill_page_cache_func(struct work_struct *work) +{ + struct kvfree_rcu_bulk_data *bnode; + struct kfree_rcu_cpu *krcp = + container_of(work, struct kfree_rcu_cpu, + page_cache_work); + unsigned long flags; + bool pushed; + int i; + + for (i = 0; i < rcu_min_cached_objs; i++) { + bnode = (struct kvfree_rcu_bulk_data *) + __get_free_page(GFP_KERNEL | __GFP_NOWARN); + + if (bnode) { + raw_spin_lock_irqsave(&krcp->lock, flags); + pushed = put_cached_bnode(krcp, bnode); + raw_spin_unlock_irqrestore(&krcp->lock, flags); + + if (!pushed) { + free_page((unsigned long) bnode); + break; + } + } + } + + atomic_set(&krcp->work_in_progress, 0); +} + +static void +run_page_cache_worker(struct kfree_rcu_cpu *krcp) +{ + if (rcu_scheduler_active == RCU_SCHEDULER_RUNNING && + !atomic_xchg(&krcp->work_in_progress, 1)) { + hrtimer_init(&krcp->hrtimer, CLOCK_MONOTONIC, + HRTIMER_MODE_REL); + krcp->hrtimer.function = schedule_page_work_fn; + hrtimer_start(&krcp->hrtimer, 0, HRTIMER_MODE_REL); + } +} + static inline bool kvfree_call_rcu_add_ptr_to_bulk(struct kfree_rcu_cpu *krcp, void *ptr) { @@ -3363,32 +3425,8 @@ kvfree_call_rcu_add_ptr_to_bulk(struct kfree_rcu_cpu *krcp, void *ptr) if (!krcp->bkvhead[idx] || krcp->bkvhead[idx]->nr_records == KVFREE_BULK_MAX_ENTR) { bnode = get_cached_bnode(krcp); - if (!bnode) { - /* - * To keep this path working on raw non-preemptible - * sections, prevent the optional entry into the - * allocator as it uses sleeping locks. In fact, even - * if the caller of kfree_rcu() is preemptible, this - * path still is not, as krcp->lock is a raw spinlock. - * With additional page pre-allocation in the works, - * hitting this return is going to be much less likely. - */ - if (IS_ENABLED(CONFIG_PREEMPT_RT)) - return false; - - /* - * NOTE: For one argument of kvfree_rcu() we can - * drop the lock and get the page in sleepable - * context. That would allow to maintain an array - * for the CONFIG_PREEMPT_RT as well if no cached - * pages are available. - */ - bnode = (struct kvfree_rcu_bulk_data *) - __get_free_page(GFP_NOWAIT | __GFP_NOWARN); - } - /* Switch to emergency path. */ - if (unlikely(!bnode)) + if (!bnode) return false; /* Initialize the new block. */ @@ -3452,12 +3490,10 @@ void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func) goto unlock_return; } - /* - * Under high memory pressure GFP_NOWAIT can fail, - * in that case the emergency path is maintained. - */ success = kvfree_call_rcu_add_ptr_to_bulk(krcp, ptr); if (!success) { + run_page_cache_worker(krcp); + if (head == NULL) // Inline if kvfree_rcu(one_arg) call. goto unlock_return; @@ -4449,24 +4485,14 @@ static void __init kfree_rcu_batch_init(void) for_each_possible_cpu(cpu) { struct kfree_rcu_cpu *krcp = per_cpu_ptr(&krc, cpu); - struct kvfree_rcu_bulk_data *bnode; for (i = 0; i < KFREE_N_BATCHES; i++) { INIT_RCU_WORK(&krcp->krw_arr[i].rcu_work, kfree_rcu_work); krcp->krw_arr[i].krcp = krcp; } - for (i = 0; i < rcu_min_cached_objs; i++) { - bnode = (struct kvfree_rcu_bulk_data *) - __get_free_page(GFP_NOWAIT | __GFP_NOWARN); - - if (bnode) - put_cached_bnode(krcp, bnode); - else - pr_err("Failed to preallocate for %d CPU!\n", cpu); - } - INIT_DELAYED_WORK(&krcp->monitor_work, kfree_rcu_monitor); + INIT_WORK(&krcp->page_cache_work, fill_page_cache_func); krcp->initialized = true; } if (register_shrinker(&kfree_rcu_shrinker)) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index e7e453492cffc0..77aa0e788b9b76 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -6100,12 +6100,8 @@ static void do_sched_yield(void) schedstat_inc(rq->yld_count); current->sched_class->yield_task(rq); - /* - * Since we are going to call schedule() anyway, there's - * no need to preempt or enable interrupts: - */ preempt_disable(); - rq_unlock(rq, &rf); + rq_unlock_irq(rq, &rf); sched_preempt_enable_no_resched(); schedule(); diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 1d3c97268ec0d9..8d06d1f4e2f7b8 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -2547,7 +2547,7 @@ int sched_dl_global_validate(void) u64 period = global_rt_period(); u64 new_bw = to_ratio(period, runtime); struct dl_bw *dl_b; - int cpu, ret = 0; + int cpu, cpus, ret = 0; unsigned long flags; /* @@ -2562,9 +2562,10 @@ int sched_dl_global_validate(void) for_each_possible_cpu(cpu) { rcu_read_lock_sched(); dl_b = dl_bw_of(cpu); + cpus = dl_bw_cpus(cpu); raw_spin_lock_irqsave(&dl_b->lock, flags); - if (new_bw < dl_b->total_bw) + if (new_bw * cpus < dl_b->total_bw) ret = -EBUSY; raw_spin_unlock_irqrestore(&dl_b->lock, flags); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index df80bfcea92eba..c122176c627ec3 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -257,30 +257,6 @@ struct rt_bandwidth { void __dl_clear_params(struct task_struct *p); -/* - * To keep the bandwidth of -deadline tasks and groups under control - * we need some place where: - * - store the maximum -deadline bandwidth of the system (the group); - * - cache the fraction of that bandwidth that is currently allocated. - * - * This is all done in the data structure below. It is similar to the - * one used for RT-throttling (rt_bandwidth), with the main difference - * that, since here we are only interested in admission control, we - * do not decrease any runtime while the group "executes", neither we - * need a timer to replenish it. - * - * With respect to SMP, the bandwidth is given on a per-CPU basis, - * meaning that: - * - dl_bw (< 100%) is the bandwidth of the system (group) on each CPU; - * - dl_total_bw array contains, in the i-eth element, the currently - * allocated bandwidth on the i-eth CPU. - * Moreover, groups consume bandwidth on each CPU, while tasks only - * consume bandwidth on the CPU they're running on. - * Finally, dl_total_bw_cpu is used to cache the index of dl_total_bw - * that will be shown the next time the proc or cgroup controls will - * be red. It on its turn can be changed by writing on its own - * control. - */ struct dl_bandwidth { raw_spinlock_t dl_runtime_lock; u64 dl_runtime; @@ -292,6 +268,24 @@ static inline int dl_bandwidth_enabled(void) return sysctl_sched_rt_runtime >= 0; } +/* + * To keep the bandwidth of -deadline tasks under control + * we need some place where: + * - store the maximum -deadline bandwidth of each cpu; + * - cache the fraction of bandwidth that is currently allocated in + * each root domain; + * + * This is all done in the data structure below. It is similar to the + * one used for RT-throttling (rt_bandwidth), with the main difference + * that, since here we are only interested in admission control, we + * do not decrease any runtime while the group "executes", neither we + * need a timer to replenish it. + * + * With respect to SMP, bandwidth is given on a per root domain basis, + * meaning that: + * - bw (< 100%) is the deadline bandwidth of each CPU; + * - total_bw is the currently allocated bandwidth in each root domain; + */ struct dl_bw { raw_spinlock_t lock; u64 bw; diff --git a/kernel/smpboot.c b/kernel/smpboot.c index 2efe1e206167cc..f25208e8df8365 100644 --- a/kernel/smpboot.c +++ b/kernel/smpboot.c @@ -188,6 +188,7 @@ __smpboot_create_thread(struct smp_hotplug_thread *ht, unsigned int cpu) kfree(td); return PTR_ERR(tsk); } + kthread_set_per_cpu(tsk, cpu); /* * Park the thread so that it could start right on the CPU * when it is available. diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 81632cd5e3b729..e8d351b7f9b033 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -941,13 +941,6 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts) */ if (tick_do_timer_cpu == cpu) return false; - /* - * Boot safety: make sure the timekeeping duty has been - * assigned before entering dyntick-idle mode, - * tick_do_timer_cpu is TICK_DO_TIMER_BOOT - */ - if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_BOOT)) - return false; /* Should not happen for nohz-full */ if (WARN_ON_ONCE(tick_do_timer_cpu == TICK_DO_TIMER_NONE)) diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index e1bf5228fb692a..29db703f68806b 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -531,7 +531,7 @@ config KPROBE_EVENTS config KPROBE_EVENTS_ON_NOTRACE bool "Do NOT protect notrace function from kprobe events" depends on KPROBE_EVENTS - depends on KPROBES_ON_FTRACE + depends on DYNAMIC_FTRACE default n help This is only for the developers who want to debug ftrace itself diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index a125ea5e04cd75..0dde84b9d29feb 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -2041,10 +2041,12 @@ struct bpf_raw_event_map *bpf_get_raw_tracepoint(const char *name) void bpf_put_raw_tracepoint(struct bpf_raw_event_map *btp) { - struct module *mod = __module_address((unsigned long)btp); + struct module *mod; - if (mod) - module_put(mod); + preempt_disable(); + mod = __module_address((unsigned long)btp); + module_put(mod); + preempt_enable(); } static __always_inline diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index a6268e09160a5b..ddeb865706ba49 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -129,7 +129,16 @@ int ring_buffer_print_entry_header(struct trace_seq *s) #define RB_ALIGNMENT 4U #define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX) #define RB_EVNT_MIN_SIZE 8U /* two 32bit words */ -#define RB_ALIGN_DATA __aligned(RB_ALIGNMENT) + +#ifndef CONFIG_HAVE_64BIT_ALIGNED_ACCESS +# define RB_FORCE_8BYTE_ALIGNMENT 0 +# define RB_ARCH_ALIGNMENT RB_ALIGNMENT +#else +# define RB_FORCE_8BYTE_ALIGNMENT 1 +# define RB_ARCH_ALIGNMENT 8U +#endif + +#define RB_ALIGN_DATA __aligned(RB_ARCH_ALIGNMENT) /* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */ #define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX @@ -2719,7 +2728,7 @@ rb_update_event(struct ring_buffer_per_cpu *cpu_buffer, event->time_delta = delta; length -= RB_EVNT_HDR_SIZE; - if (length > RB_MAX_SMALL_DATA) { + if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) { event->type_len = 0; event->array[0] = length; } else @@ -2734,11 +2743,11 @@ static unsigned rb_calculate_event_length(unsigned length) if (!length) length++; - if (length > RB_MAX_SMALL_DATA) + if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) length += sizeof(event.array[0]); length += RB_EVNT_HDR_SIZE; - length = ALIGN(length, RB_ALIGNMENT); + length = ALIGN(length, RB_ARCH_ALIGNMENT); /* * In case the time delta is larger than the 27 bits for it diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 06134189e9a720..3119d68d012dfb 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -68,10 +68,21 @@ bool ring_buffer_expanded; static bool __read_mostly tracing_selftest_running; /* - * If a tracer is running, we do not want to run SELFTEST. + * If boot-time tracing including tracers/events via kernel cmdline + * is running, we do not want to run SELFTEST. */ bool __read_mostly tracing_selftest_disabled; +#ifdef CONFIG_FTRACE_STARTUP_TEST +void __init disable_tracing_selftest(const char *reason) +{ + if (!tracing_selftest_disabled) { + tracing_selftest_disabled = true; + pr_info("Ftrace startup test is disabled due to %s\n", reason); + } +} +#endif + /* Pipe tracepoints to printk */ struct trace_iterator *tracepoint_print_iter; int tracepoint_printk; @@ -2113,11 +2124,7 @@ int __init register_tracer(struct tracer *type) apply_trace_boot_options(); /* disable other selftests, since this will break it. */ - tracing_selftest_disabled = true; -#ifdef CONFIG_FTRACE_STARTUP_TEST - printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n", - type->name); -#endif + disable_tracing_selftest("running a tracer"); out_unlock: return ret; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 1dadef445cd1e8..6784b572ce5974 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -896,6 +896,8 @@ extern bool ring_buffer_expanded; extern bool tracing_selftest_disabled; #ifdef CONFIG_FTRACE_STARTUP_TEST +extern void __init disable_tracing_selftest(const char *reason); + extern int trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr); extern int trace_selftest_startup_function_graph(struct tracer *trace, @@ -919,6 +921,9 @@ extern int trace_selftest_startup_branch(struct tracer *trace, */ #define __tracer_data __refdata #else +static inline void __init disable_tracing_selftest(const char *reason) +{ +} /* Tracers are seldom changed. Optimize when selftests are disabled. */ #define __tracer_data __read_mostly #endif /* CONFIG_FTRACE_STARTUP_TEST */ diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c index c22a152ef0b4ff..a82f03f385f896 100644 --- a/kernel/trace/trace_boot.c +++ b/kernel/trace/trace_boot.c @@ -344,6 +344,8 @@ static int __init trace_boot_init(void) trace_boot_init_one_instance(tr, trace_node); trace_boot_init_instances(trace_node); + disable_tracing_selftest("running boot-time tracing"); + return 0; } /* diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 47a71f96e5bcc7..802f3e7d8b8b5f 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -3201,7 +3201,7 @@ static __init int setup_trace_event(char *str) { strlcpy(bootup_event_buf, str, COMMAND_LINE_SIZE); ring_buffer_expanded = true; - tracing_selftest_disabled = true; + disable_tracing_selftest("running event tracing"); return 1; } diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index b911e9f6d9f5c3..5fff39541b8ae7 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -25,11 +25,12 @@ /* Kprobe early definition from command line */ static char kprobe_boot_events_buf[COMMAND_LINE_SIZE] __initdata; -static bool kprobe_boot_events_enabled __initdata; static int __init set_kprobe_boot_events(char *str) { strlcpy(kprobe_boot_events_buf, str, COMMAND_LINE_SIZE); + disable_tracing_selftest("running kprobe events"); + return 0; } __setup("kprobe_event=", set_kprobe_boot_events); @@ -433,7 +434,7 @@ static int disable_trace_kprobe(struct trace_event_call *call, return 0; } -#if defined(CONFIG_KPROBES_ON_FTRACE) && \ +#if defined(CONFIG_DYNAMIC_FTRACE) && \ !defined(CONFIG_KPROBE_EVENTS_ON_NOTRACE) static bool __within_notrace_func(unsigned long addr) { @@ -1887,8 +1888,6 @@ static __init void setup_boot_kprobe_events(void) ret = trace_run_command(cmd, create_or_delete_trace_kprobe); if (ret) pr_warn("Failed to add event(%d): %s\n", ret, cmd); - else - kprobe_boot_events_enabled = true; cmd = p; } @@ -1973,10 +1972,8 @@ static __init int kprobe_trace_self_tests_init(void) if (tracing_is_disabled()) return -ENODEV; - if (kprobe_boot_events_enabled) { - pr_info("Skipping kprobe tests due to kprobe_event on cmdline\n"); + if (tracing_selftest_disabled) return 0; - } target = kprobe_trace_selftest_target; diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 4738ad48a66740..6f28b8b11ead66 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -787,7 +787,7 @@ trace_selftest_startup_function_graph(struct tracer *trace, /* Have we just recovered from a hang? */ if (graph_hang_thresh > GRAPH_MAX_FUNC_TEST) { - tracing_selftest_disabled = true; + disable_tracing_selftest("recovering from a hang"); ret = -1; goto out; } diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 437935e7a19911..1d99c52cc99a6c 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -1845,12 +1845,6 @@ static void worker_attach_to_pool(struct worker *worker, { mutex_lock(&wq_pool_attach_mutex); - /* - * set_cpus_allowed_ptr() will fail if the cpumask doesn't have any - * online CPUs. It'll be re-applied when any of the CPUs come up. - */ - set_cpus_allowed_ptr(worker->task, pool->attrs->cpumask); - /* * The wq_pool_attach_mutex ensures %POOL_DISASSOCIATED remains * stable across this function. See the comments above the flag @@ -1859,6 +1853,9 @@ static void worker_attach_to_pool(struct worker *worker, if (pool->flags & POOL_DISASSOCIATED) worker->flags |= WORKER_UNBOUND; + if (worker->rescue_wq) + set_cpus_allowed_ptr(worker->task, pool->attrs->cpumask); + list_add_tail(&worker->node, &pool->workers); worker->pool = pool; @@ -3728,17 +3725,24 @@ static void pwq_adjust_max_active(struct pool_workqueue *pwq) * is updated and visible. */ if (!freezable || !workqueue_freezing) { + bool kick = false; + pwq->max_active = wq->saved_max_active; while (!list_empty(&pwq->delayed_works) && - pwq->nr_active < pwq->max_active) + pwq->nr_active < pwq->max_active) { pwq_activate_first_delayed(pwq); + kick = true; + } /* * Need to kick a worker after thawed or an unbound wq's - * max_active is bumped. It's a slow path. Do it always. + * max_active is bumped. In realtime scenarios, always kicking a + * worker will cause interference on the isolated cpu cores, so + * let's kick iff work items were activated. */ - wake_up_worker(pwq->pool); + if (kick) + wake_up_worker(pwq->pool); } else { pwq->max_active = 0; } diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c index bd7b3aaa93c380..c70d6347afa2b4 100644 --- a/lib/dynamic_debug.c +++ b/lib/dynamic_debug.c @@ -561,9 +561,14 @@ static int ddebug_exec_queries(char *query, const char *modname) int dynamic_debug_exec_queries(const char *query, const char *modname) { int rc; - char *qry = kstrndup(query, PAGE_SIZE, GFP_KERNEL); + char *qry; /* writable copy of query */ - if (!query) + if (!query) { + pr_err("non-null query/command string expected\n"); + return -EINVAL; + } + qry = kstrndup(query, PAGE_SIZE, GFP_KERNEL); + if (!qry) return -ENOMEM; rc = ddebug_exec_queries(qry, modname); diff --git a/lib/genalloc.c b/lib/genalloc.c index 7f1244b5294a8a..dab97bb69df63e 100644 --- a/lib/genalloc.c +++ b/lib/genalloc.c @@ -81,14 +81,14 @@ static int clear_bits_ll(unsigned long *addr, unsigned long mask_to_clear) * users set the same bit, one user will return remain bits, otherwise * return 0. */ -static int bitmap_set_ll(unsigned long *map, int start, int nr) +static int bitmap_set_ll(unsigned long *map, unsigned long start, unsigned long nr) { unsigned long *p = map + BIT_WORD(start); - const int size = start + nr; + const unsigned long size = start + nr; int bits_to_set = BITS_PER_LONG - (start % BITS_PER_LONG); unsigned long mask_to_set = BITMAP_FIRST_WORD_MASK(start); - while (nr - bits_to_set >= 0) { + while (nr >= bits_to_set) { if (set_bits_ll(p, mask_to_set)) return nr; nr -= bits_to_set; @@ -116,14 +116,15 @@ static int bitmap_set_ll(unsigned long *map, int start, int nr) * users clear the same bit, one user will return remain bits, * otherwise return 0. */ -static int bitmap_clear_ll(unsigned long *map, int start, int nr) +static unsigned long +bitmap_clear_ll(unsigned long *map, unsigned long start, unsigned long nr) { unsigned long *p = map + BIT_WORD(start); - const int size = start + nr; + const unsigned long size = start + nr; int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG); unsigned long mask_to_clear = BITMAP_FIRST_WORD_MASK(start); - while (nr - bits_to_clear >= 0) { + while (nr >= bits_to_clear) { if (clear_bits_ll(p, mask_to_clear)) return nr; nr -= bits_to_clear; @@ -183,8 +184,8 @@ int gen_pool_add_owner(struct gen_pool *pool, unsigned long virt, phys_addr_t ph size_t size, int nid, void *owner) { struct gen_pool_chunk *chunk; - int nbits = size >> pool->min_alloc_order; - int nbytes = sizeof(struct gen_pool_chunk) + + unsigned long nbits = size >> pool->min_alloc_order; + unsigned long nbytes = sizeof(struct gen_pool_chunk) + BITS_TO_LONGS(nbits) * sizeof(long); chunk = vzalloc_node(nbytes, nid); @@ -242,7 +243,7 @@ void gen_pool_destroy(struct gen_pool *pool) struct list_head *_chunk, *_next_chunk; struct gen_pool_chunk *chunk; int order = pool->min_alloc_order; - int bit, end_bit; + unsigned long bit, end_bit; list_for_each_safe(_chunk, _next_chunk, &pool->chunks) { chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); @@ -278,7 +279,7 @@ unsigned long gen_pool_alloc_algo_owner(struct gen_pool *pool, size_t size, struct gen_pool_chunk *chunk; unsigned long addr = 0; int order = pool->min_alloc_order; - int nbits, start_bit, end_bit, remain; + unsigned long nbits, start_bit, end_bit, remain; #ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG BUG_ON(in_nmi()); @@ -487,7 +488,7 @@ void gen_pool_free_owner(struct gen_pool *pool, unsigned long addr, size_t size, { struct gen_pool_chunk *chunk; int order = pool->min_alloc_order; - int start_bit, nbits, remain; + unsigned long start_bit, nbits, remain; #ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG BUG_ON(in_nmi()); @@ -755,7 +756,7 @@ unsigned long gen_pool_best_fit(unsigned long *map, unsigned long size, index = bitmap_find_next_zero_area(map, size, start, nr, 0); while (index < size) { - int next_bit = find_next_bit(map, size, index + nr); + unsigned long next_bit = find_next_bit(map, size, index + nr); if ((next_bit - index) < len) { len = next_bit - index; start_bit = index; diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 1635111c5bd2af..a21e6a5792c5a5 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -1658,7 +1658,7 @@ static int copy_compat_iovec_from_user(struct iovec *iov, (const struct compat_iovec __user *)uvec; int ret = -EFAULT, i; - if (!user_access_begin(uvec, nr_segs * sizeof(*uvec))) + if (!user_access_begin(uiov, nr_segs * sizeof(*uiov))) return -EFAULT; for (i = 0; i < nr_segs; i++) { diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile index b4c0df6d706dcb..c770570bfe4f2d 100644 --- a/lib/raid6/Makefile +++ b/lib/raid6/Makefile @@ -48,7 +48,7 @@ endif endif quiet_cmd_unroll = UNROLL $@ - cmd_unroll = $(AWK) -f$(srctree)/$(src)/unroll.awk -vN=$* < $< > $@ + cmd_unroll = $(AWK) -v N=$* -f $(srctree)/$(src)/unroll.awk < $< > $@ targets += int1.c int2.c int4.c int8.c int16.c int32.c $(obj)/int%.c: $(src)/int.uc $(src)/unroll.awk FORCE diff --git a/lib/zlib_dfltcc/Makefile b/lib/zlib_dfltcc/Makefile index 8e4d5afbbb1094..66e1c96387c407 100644 --- a/lib/zlib_dfltcc/Makefile +++ b/lib/zlib_dfltcc/Makefile @@ -8,4 +8,4 @@ obj-$(CONFIG_ZLIB_DFLTCC) += zlib_dfltcc.o -zlib_dfltcc-objs := dfltcc.o dfltcc_deflate.o dfltcc_inflate.o dfltcc_syms.o +zlib_dfltcc-objs := dfltcc.o dfltcc_deflate.o dfltcc_inflate.o diff --git a/lib/zlib_dfltcc/dfltcc.c b/lib/zlib_dfltcc/dfltcc.c index c30de430b30ca6..782f76e9d4dab1 100644 --- a/lib/zlib_dfltcc/dfltcc.c +++ b/lib/zlib_dfltcc/dfltcc.c @@ -1,7 +1,8 @@ // SPDX-License-Identifier: Zlib /* dfltcc.c - SystemZ DEFLATE CONVERSION CALL support. */ -#include +#include +#include #include "dfltcc_util.h" #include "dfltcc.h" @@ -53,3 +54,6 @@ void dfltcc_reset( dfltcc_state->dht_threshold = DFLTCC_DHT_MIN_SAMPLE_SIZE; dfltcc_state->param.ribm = DFLTCC_RIBM; } +EXPORT_SYMBOL(dfltcc_reset); + +MODULE_LICENSE("GPL"); diff --git a/lib/zlib_dfltcc/dfltcc_deflate.c b/lib/zlib_dfltcc/dfltcc_deflate.c index 00c185101c6d14..6c946e8532eec6 100644 --- a/lib/zlib_dfltcc/dfltcc_deflate.c +++ b/lib/zlib_dfltcc/dfltcc_deflate.c @@ -4,6 +4,7 @@ #include "dfltcc_util.h" #include "dfltcc.h" #include +#include #include /* @@ -34,6 +35,7 @@ int dfltcc_can_deflate( return 1; } +EXPORT_SYMBOL(dfltcc_can_deflate); static void dfltcc_gdht( z_streamp strm @@ -277,3 +279,4 @@ int dfltcc_deflate( goto again; /* deflate() must use all input or all output */ return 1; } +EXPORT_SYMBOL(dfltcc_deflate); diff --git a/lib/zlib_dfltcc/dfltcc_inflate.c b/lib/zlib_dfltcc/dfltcc_inflate.c index db107016d29b32..fb60b5a6a1cb67 100644 --- a/lib/zlib_dfltcc/dfltcc_inflate.c +++ b/lib/zlib_dfltcc/dfltcc_inflate.c @@ -125,7 +125,7 @@ dfltcc_inflate_action dfltcc_inflate( param->ho = (state->write - state->whave) & ((1 << HB_BITS) - 1); if (param->hl) param->nt = 0; /* Honor history for the first block */ - param->cv = state->flags ? REVERSE(state->check) : state->check; + param->cv = state->check; /* Inflate */ do { @@ -138,7 +138,7 @@ dfltcc_inflate_action dfltcc_inflate( state->bits = param->sbb; state->whave = param->hl; state->write = (param->ho + param->hl) & ((1 << HB_BITS) - 1); - state->check = state->flags ? REVERSE(param->cv) : param->cv; + state->check = param->cv; if (cc == DFLTCC_CC_OP2_CORRUPT && param->oesc != 0) { /* Report an error if stream is corrupted */ state->mode = BAD; diff --git a/lib/zlib_dfltcc/dfltcc_syms.c b/lib/zlib_dfltcc/dfltcc_syms.c deleted file mode 100644 index 6f23481804c1d8..00000000000000 --- a/lib/zlib_dfltcc/dfltcc_syms.c +++ /dev/null @@ -1,17 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * linux/lib/zlib_dfltcc/dfltcc_syms.c - * - * Exported symbols for the s390 zlib dfltcc support. - * - */ - -#include -#include -#include -#include "dfltcc.h" - -EXPORT_SYMBOL(dfltcc_can_deflate); -EXPORT_SYMBOL(dfltcc_deflate); -EXPORT_SYMBOL(dfltcc_reset); -MODULE_LICENSE("GPL"); diff --git a/mm/gup.c b/mm/gup.c index 98eb8e6d2609c3..054ff923d3d92e 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -123,6 +123,28 @@ static __maybe_unused struct page *try_grab_compound_head(struct page *page, return NULL; } +static void put_compound_head(struct page *page, int refs, unsigned int flags) +{ + if (flags & FOLL_PIN) { + mod_node_page_state(page_pgdat(page), NR_FOLL_PIN_RELEASED, + refs); + + if (hpage_pincount_available(page)) + hpage_pincount_sub(page, refs); + else + refs *= GUP_PIN_COUNTING_BIAS; + } + + VM_BUG_ON_PAGE(page_ref_count(page) < refs, page); + /* + * Calling put_page() for each ref is unnecessarily slow. Only the last + * ref needs a put_page(). + */ + if (refs > 1) + page_ref_sub(page, refs - 1); + put_page(page); +} + /** * try_grab_page() - elevate a page's refcount by a flag-dependent amount * @@ -177,41 +199,6 @@ bool __must_check try_grab_page(struct page *page, unsigned int flags) return true; } -#ifdef CONFIG_DEV_PAGEMAP_OPS -static bool __unpin_devmap_managed_user_page(struct page *page) -{ - int count, refs = 1; - - if (!page_is_devmap_managed(page)) - return false; - - if (hpage_pincount_available(page)) - hpage_pincount_sub(page, 1); - else - refs = GUP_PIN_COUNTING_BIAS; - - count = page_ref_sub_return(page, refs); - - mod_node_page_state(page_pgdat(page), NR_FOLL_PIN_RELEASED, 1); - /* - * devmap page refcounts are 1-based, rather than 0-based: if - * refcount is 1, then the page is free and the refcount is - * stable because nobody holds a reference on the page. - */ - if (count == 1) - free_devmap_managed_page(page); - else if (!count) - __put_page(page); - - return true; -} -#else -static bool __unpin_devmap_managed_user_page(struct page *page) -{ - return false; -} -#endif /* CONFIG_DEV_PAGEMAP_OPS */ - /** * unpin_user_page() - release a dma-pinned page * @page: pointer to page to be released @@ -223,28 +210,7 @@ static bool __unpin_devmap_managed_user_page(struct page *page) */ void unpin_user_page(struct page *page) { - int refs = 1; - - page = compound_head(page); - - /* - * For devmap managed pages we need to catch refcount transition from - * GUP_PIN_COUNTING_BIAS to 1, when refcount reach one it means the - * page is free and we need to inform the device driver through - * callback. See include/linux/memremap.h and HMM for details. - */ - if (__unpin_devmap_managed_user_page(page)) - return; - - if (hpage_pincount_available(page)) - hpage_pincount_sub(page, 1); - else - refs = GUP_PIN_COUNTING_BIAS; - - if (page_ref_sub_and_test(page, refs)) - __put_page(page); - - mod_node_page_state(page_pgdat(page), NR_FOLL_PIN_RELEASED, 1); + put_compound_head(compound_head(page), 1, FOLL_PIN); } EXPORT_SYMBOL(unpin_user_page); @@ -2062,29 +2028,6 @@ EXPORT_SYMBOL(get_user_pages_unlocked); * This code is based heavily on the PowerPC implementation by Nick Piggin. */ #ifdef CONFIG_HAVE_FAST_GUP - -static void put_compound_head(struct page *page, int refs, unsigned int flags) -{ - if (flags & FOLL_PIN) { - mod_node_page_state(page_pgdat(page), NR_FOLL_PIN_RELEASED, - refs); - - if (hpage_pincount_available(page)) - hpage_pincount_sub(page, refs); - else - refs *= GUP_PIN_COUNTING_BIAS; - } - - VM_BUG_ON_PAGE(page_ref_count(page) < refs, page); - /* - * Calling put_page() for each ref is unnecessarily slow. Only the last - * ref needs a put_page(). - */ - if (refs > 1) - page_ref_sub(page, refs - 1); - put_page(page); -} - #ifdef CONFIG_GUP_GET_PTE_LOW_HIGH /* @@ -2677,13 +2620,61 @@ static int __gup_longterm_unlocked(unsigned long start, int nr_pages, return ret; } -static int internal_get_user_pages_fast(unsigned long start, int nr_pages, +static unsigned long lockless_pages_from_mm(unsigned long start, + unsigned long end, + unsigned int gup_flags, + struct page **pages) +{ + unsigned long flags; + int nr_pinned = 0; + unsigned seq; + + if (!IS_ENABLED(CONFIG_HAVE_FAST_GUP) || + !gup_fast_permitted(start, end)) + return 0; + + if (gup_flags & FOLL_PIN) { + seq = raw_read_seqcount(¤t->mm->write_protect_seq); + if (seq & 1) + return 0; + } + + /* + * Disable interrupts. The nested form is used, in order to allow full, + * general purpose use of this routine. + * + * With interrupts disabled, we block page table pages from being freed + * from under us. See struct mmu_table_batch comments in + * include/asm-generic/tlb.h for more details. + * + * We do not adopt an rcu_read_lock() here as we also want to block IPIs + * that come from THPs splitting. + */ + local_irq_save(flags); + gup_pgd_range(start, end, gup_flags, pages, &nr_pinned); + local_irq_restore(flags); + + /* + * When pinning pages for DMA there could be a concurrent write protect + * from fork() via copy_page_range(), in this case always fail fast GUP. + */ + if (gup_flags & FOLL_PIN) { + if (read_seqcount_retry(¤t->mm->write_protect_seq, seq)) { + unpin_user_pages(pages, nr_pinned); + return 0; + } + } + return nr_pinned; +} + +static int internal_get_user_pages_fast(unsigned long start, + unsigned long nr_pages, unsigned int gup_flags, struct page **pages) { - unsigned long addr, len, end; - unsigned long flags; - int nr_pinned = 0, ret = 0; + unsigned long len, end; + unsigned long nr_pinned; + int ret; if (WARN_ON_ONCE(gup_flags & ~(FOLL_WRITE | FOLL_LONGTERM | FOLL_FORCE | FOLL_PIN | FOLL_GET | @@ -2697,54 +2688,33 @@ static int internal_get_user_pages_fast(unsigned long start, int nr_pages, might_lock_read(¤t->mm->mmap_lock); start = untagged_addr(start) & PAGE_MASK; - addr = start; - len = (unsigned long) nr_pages << PAGE_SHIFT; - end = start + len; - - if (end <= start) + len = nr_pages << PAGE_SHIFT; + if (check_add_overflow(start, len, &end)) return 0; if (unlikely(!access_ok((void __user *)start, len))) return -EFAULT; - /* - * Disable interrupts. The nested form is used, in order to allow - * full, general purpose use of this routine. - * - * With interrupts disabled, we block page table pages from being - * freed from under us. See struct mmu_table_batch comments in - * include/asm-generic/tlb.h for more details. - * - * We do not adopt an rcu_read_lock(.) here as we also want to - * block IPIs that come from THPs splitting. - */ - if (IS_ENABLED(CONFIG_HAVE_FAST_GUP) && gup_fast_permitted(start, end)) { - unsigned long fast_flags = gup_flags; - - local_irq_save(flags); - gup_pgd_range(addr, end, fast_flags, pages, &nr_pinned); - local_irq_restore(flags); - ret = nr_pinned; - } - - if (nr_pinned < nr_pages && !(gup_flags & FOLL_FAST_ONLY)) { - /* Try to get the remaining pages with get_user_pages */ - start += nr_pinned << PAGE_SHIFT; - pages += nr_pinned; - - ret = __gup_longterm_unlocked(start, nr_pages - nr_pinned, - gup_flags, pages); + nr_pinned = lockless_pages_from_mm(start, end, gup_flags, pages); + if (nr_pinned == nr_pages || gup_flags & FOLL_FAST_ONLY) + return nr_pinned; - /* Have to be a bit careful with return values */ - if (nr_pinned > 0) { - if (ret < 0) - ret = nr_pinned; - else - ret += nr_pinned; - } + /* Slow path: try to get the remaining pages with get_user_pages */ + start += nr_pinned << PAGE_SHIFT; + pages += nr_pinned; + ret = __gup_longterm_unlocked(start, nr_pages - nr_pinned, gup_flags, + pages); + if (ret < 0) { + /* + * The caller has to unpin the pages we already pinned so + * returning -errno is not an option + */ + if (nr_pinned) + return nr_pinned; + return ret; } - - return ret; + return ret + nr_pinned; } + /** * get_user_pages_fast_only() - pin user pages in memory * @start: starting user address diff --git a/mm/huge_memory.c b/mm/huge_memory.c index ec2bb93f743143..85eda66eb625db 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2321,7 +2321,7 @@ void vma_adjust_trans_huge(struct vm_area_struct *vma, static void unmap_page(struct page *page) { - enum ttu_flags ttu_flags = TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS | + enum ttu_flags ttu_flags = TTU_IGNORE_MLOCK | TTU_RMAP_LOCKED | TTU_SPLIT_HUGE_PMD; bool unmap_success; diff --git a/mm/hugetlb.c b/mm/hugetlb.c index d029d938d26d66..9a3f06cdcc2a8d 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -4106,10 +4106,30 @@ static vm_fault_t hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma, * may get SIGKILLed if it later faults. */ if (outside_reserve) { + struct address_space *mapping = vma->vm_file->f_mapping; + pgoff_t idx; + u32 hash; + put_page(old_page); BUG_ON(huge_pte_none(pte)); + /* + * Drop hugetlb_fault_mutex and i_mmap_rwsem before + * unmapping. unmapping needs to hold i_mmap_rwsem + * in write mode. Dropping i_mmap_rwsem in read mode + * here is OK as COW mappings do not interact with + * PMD sharing. + * + * Reacquire both after unmap operation. + */ + idx = vma_hugecache_offset(h, vma, haddr); + hash = hugetlb_fault_mutex_hash(mapping, idx); + mutex_unlock(&hugetlb_fault_mutex_table[hash]); + i_mmap_unlock_read(mapping); + unmap_ref_private(mm, vma, old_page, haddr); - BUG_ON(huge_pte_none(pte)); + + i_mmap_lock_read(mapping); + mutex_lock(&hugetlb_fault_mutex_table[hash]); spin_lock(ptl); ptep = huge_pte_offset(mm, haddr, huge_page_size(h)); if (likely(ptep && @@ -4352,7 +4372,7 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm, * So we need to block hugepage fault by PG_hwpoison bit check. */ if (unlikely(PageHWPoison(page))) { - ret = VM_FAULT_HWPOISON | + ret = VM_FAULT_HWPOISON_LARGE | VM_FAULT_SET_HINDEX(hstate_index(h)); goto backout_unlocked; } @@ -5115,6 +5135,7 @@ int hugetlb_reserve_pages(struct inode *inode, if (unlikely(add < 0)) { hugetlb_acct_memory(h, -gbl_reserve); + ret = add; goto out_put_pages; } else if (unlikely(chg > add)) { /* diff --git a/mm/init-mm.c b/mm/init-mm.c index 3a613c85f9ede2..153162669f8062 100644 --- a/mm/init-mm.c +++ b/mm/init-mm.c @@ -31,6 +31,7 @@ struct mm_struct init_mm = { .pgd = swapper_pg_dir, .mm_users = ATOMIC_INIT(2), .mm_count = ATOMIC_INIT(1), + .write_protect_seq = SEQCNT_ZERO(init_mm.write_protect_seq), MMAP_LOCK_INITIALIZER(init_mm) .page_table_lock = __SPIN_LOCK_UNLOCKED(init_mm.page_table_lock), .arg_lock = __SPIN_LOCK_UNLOCKED(init_mm.arg_lock), diff --git a/mm/kasan/init.c b/mm/kasan/init.c index fe6be0be1f763c..b8c6ec172bb22a 100644 --- a/mm/kasan/init.c +++ b/mm/kasan/init.c @@ -377,9 +377,10 @@ static void kasan_remove_pmd_table(pmd_t *pmd, unsigned long addr, if (kasan_pte_table(*pmd)) { if (IS_ALIGNED(addr, PMD_SIZE) && - IS_ALIGNED(next, PMD_SIZE)) + IS_ALIGNED(next, PMD_SIZE)) { pmd_clear(pmd); - continue; + continue; + } } pte = pte_offset_kernel(pmd, addr); kasan_remove_pte_table(pte, addr, next); @@ -402,9 +403,10 @@ static void kasan_remove_pud_table(pud_t *pud, unsigned long addr, if (kasan_pmd_table(*pud)) { if (IS_ALIGNED(addr, PUD_SIZE) && - IS_ALIGNED(next, PUD_SIZE)) + IS_ALIGNED(next, PUD_SIZE)) { pud_clear(pud); - continue; + continue; + } } pmd = pmd_offset(pud, addr); pmd_base = pmd_offset(pud, 0); @@ -428,9 +430,10 @@ static void kasan_remove_p4d_table(p4d_t *p4d, unsigned long addr, if (kasan_pud_table(*p4d)) { if (IS_ALIGNED(addr, P4D_SIZE) && - IS_ALIGNED(next, P4D_SIZE)) + IS_ALIGNED(next, P4D_SIZE)) { p4d_clear(p4d); - continue; + continue; + } } pud = pud_offset(p4d, addr); kasan_remove_pud_table(pud, addr, next); @@ -462,9 +465,10 @@ void kasan_remove_zero_shadow(void *start, unsigned long size) if (kasan_p4d_table(*pgd)) { if (IS_ALIGNED(addr, PGDIR_SIZE) && - IS_ALIGNED(next, PGDIR_SIZE)) + IS_ALIGNED(next, PGDIR_SIZE)) { pgd_clear(pgd); - continue; + continue; + } } p4d = p4d_offset(pgd, addr); @@ -488,7 +492,6 @@ int kasan_add_zero_shadow(void *start, unsigned long size) ret = kasan_populate_early_shadow(shadow_start, shadow_end); if (ret) - kasan_remove_zero_shadow(shadow_start, - size >> KASAN_SHADOW_SCALE_SHIFT); + kasan_remove_zero_shadow(start, size); return ret; } diff --git a/mm/madvise.c b/mm/madvise.c index 13f5677b93222a..9abf4c5f2bce2c 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -908,14 +908,7 @@ static int madvise_inject_error(int behavior, } else { pr_info("Injecting memory failure for pfn %#lx at process virtual address %#lx\n", pfn, start); - /* - * Drop the page reference taken by get_user_pages_fast(). In - * the absence of MF_COUNT_INCREASED the memory_failure() - * routine is responsible for pinning the page to prevent it - * from being released back to the page allocator. - */ - put_page(page); - ret = memory_failure(pfn, 0); + ret = memory_failure(pfn, MF_COUNT_INCREASED); } if (ret) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 29459a6ce1c7ad..8fc23d53f55009 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -2987,6 +2987,7 @@ __always_inline struct obj_cgroup *get_obj_cgroup_from_current(void) objcg = rcu_dereference(memcg->objcg); if (objcg && obj_cgroup_tryget(objcg)) break; + objcg = NULL; } rcu_read_unlock(); @@ -3082,9 +3083,7 @@ void __memcg_kmem_uncharge(struct mem_cgroup *memcg, unsigned int nr_pages) if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) page_counter_uncharge(&memcg->kmem, nr_pages); - page_counter_uncharge(&memcg->memory, nr_pages); - if (do_memsw_account()) - page_counter_uncharge(&memcg->memsw, nr_pages); + refill_stock(memcg, nr_pages); } /** @@ -3246,8 +3245,10 @@ int obj_cgroup_charge(struct obj_cgroup *objcg, gfp_t gfp, size_t size) * independently later. */ rcu_read_lock(); +retry: memcg = obj_cgroup_memcg(objcg); - css_get(&memcg->css); + if (unlikely(!css_tryget(&memcg->css))) + goto retry; rcu_read_unlock(); nr_pages = size >> PAGE_SHIFT; diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 5d880d4eb9a267..fd653c9953cfda 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -989,7 +989,7 @@ static int get_hwpoison_page(struct page *page) static bool hwpoison_user_mappings(struct page *p, unsigned long pfn, int flags, struct page **hpagep) { - enum ttu_flags ttu = TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS; + enum ttu_flags ttu = TTU_IGNORE_MLOCK; struct address_space *mapping; LIST_HEAD(tokill); bool unmap_success = true; @@ -1231,6 +1231,12 @@ static int memory_failure_dev_pagemap(unsigned long pfn, int flags, loff_t start; dax_entry_t cookie; + if (flags & MF_COUNT_INCREASED) + /* + * Drop the extra refcount in case we come from madvise(). + */ + put_page(page); + /* * Prevent the inode from being freed while we are interrogating * the address_space, typically this would be handled by diff --git a/mm/memory.c b/mm/memory.c index c48f8df6e50268..50632c4366b8ab 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1171,6 +1171,15 @@ copy_page_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma) mmu_notifier_range_init(&range, MMU_NOTIFY_PROTECTION_PAGE, 0, src_vma, src_mm, addr, end); mmu_notifier_invalidate_range_start(&range); + /* + * Disabling preemption is not needed for the write side, as + * the read side doesn't spin, but goes to the mmap_lock. + * + * Use the raw variant of the seqcount_t write API to avoid + * lockdep complaining about preemptibility. + */ + mmap_assert_write_locked(src_mm); + raw_write_seqcount_begin(&src_mm->write_protect_seq); } ret = 0; @@ -1187,8 +1196,10 @@ copy_page_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma) } } while (dst_pgd++, src_pgd++, addr = next, addr != end); - if (is_cow) + if (is_cow) { + raw_write_seqcount_end(&src_mm->write_protect_seq); mmu_notifier_invalidate_range_end(&range); + } return ret; } diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 63b2e46b65552b..aa453a43314372 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -714,7 +714,7 @@ void __ref move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn, * expects the zone spans the pfn range. All the pages in the range * are reserved so nobody should be touching them so we should be safe */ - memmap_init_zone(nr_pages, nid, zone_idx(zone), start_pfn, + memmap_init_zone(nr_pages, nid, zone_idx(zone), start_pfn, 0, MEMINIT_HOTPLUG, altmap, migratetype); set_zone_contiguous(zone); @@ -1304,7 +1304,7 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn) if (WARN_ON(PageLRU(page))) isolate_lru_page(page); if (page_mapped(page)) - try_to_unmap(page, TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS); + try_to_unmap(page, TTU_IGNORE_MLOCK); continue; } diff --git a/mm/migrate.c b/mm/migrate.c index 5795cb82e27c35..9d7ca1bd7f4b3c 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -406,6 +406,7 @@ int migrate_page_move_mapping(struct address_space *mapping, struct zone *oldzone, *newzone; int dirty; int expected_count = expected_page_refs(mapping, page) + extra_count; + int nr = thp_nr_pages(page); if (!mapping) { /* Anonymous page without mapping */ @@ -441,7 +442,7 @@ int migrate_page_move_mapping(struct address_space *mapping, */ newpage->index = page->index; newpage->mapping = page->mapping; - page_ref_add(newpage, thp_nr_pages(page)); /* add cache reference */ + page_ref_add(newpage, nr); /* add cache reference */ if (PageSwapBacked(page)) { __SetPageSwapBacked(newpage); if (PageSwapCache(page)) { @@ -463,7 +464,7 @@ int migrate_page_move_mapping(struct address_space *mapping, if (PageTransHuge(page)) { int i; - for (i = 1; i < HPAGE_PMD_NR; i++) { + for (i = 1; i < nr; i++) { xas_next(&xas); xas_store(&xas, newpage); } @@ -474,7 +475,7 @@ int migrate_page_move_mapping(struct address_space *mapping, * to one less reference. * We know this isn't the last reference. */ - page_ref_unfreeze(page, expected_count - thp_nr_pages(page)); + page_ref_unfreeze(page, expected_count - nr); xas_unlock(&xas); /* Leave irq disabled to prevent preemption while updating stats */ @@ -497,17 +498,17 @@ int migrate_page_move_mapping(struct address_space *mapping, old_lruvec = mem_cgroup_lruvec(memcg, oldzone->zone_pgdat); new_lruvec = mem_cgroup_lruvec(memcg, newzone->zone_pgdat); - __dec_lruvec_state(old_lruvec, NR_FILE_PAGES); - __inc_lruvec_state(new_lruvec, NR_FILE_PAGES); + __mod_lruvec_state(old_lruvec, NR_FILE_PAGES, -nr); + __mod_lruvec_state(new_lruvec, NR_FILE_PAGES, nr); if (PageSwapBacked(page) && !PageSwapCache(page)) { - __dec_lruvec_state(old_lruvec, NR_SHMEM); - __inc_lruvec_state(new_lruvec, NR_SHMEM); + __mod_lruvec_state(old_lruvec, NR_SHMEM, -nr); + __mod_lruvec_state(new_lruvec, NR_SHMEM, nr); } if (dirty && mapping_can_writeback(mapping)) { - __dec_node_state(oldzone->zone_pgdat, NR_FILE_DIRTY); - __dec_zone_state(oldzone, NR_ZONE_WRITE_PENDING); - __inc_node_state(newzone->zone_pgdat, NR_FILE_DIRTY); - __inc_zone_state(newzone, NR_ZONE_WRITE_PENDING); + __mod_lruvec_state(old_lruvec, NR_FILE_DIRTY, -nr); + __mod_zone_page_state(oldzone, NR_ZONE_WRITE_PENDING, -nr); + __mod_lruvec_state(new_lruvec, NR_FILE_DIRTY, nr); + __mod_zone_page_state(newzone, NR_ZONE_WRITE_PENDING, nr); } } local_irq_enable(); @@ -1122,8 +1123,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage, /* Establish migration ptes */ VM_BUG_ON_PAGE(PageAnon(page) && !PageKsm(page) && !anon_vma, page); - try_to_unmap(page, - TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS); + try_to_unmap(page, TTU_MIGRATION|TTU_IGNORE_MLOCK); page_was_mapped = 1; } @@ -1329,8 +1329,7 @@ static int unmap_and_move_huge_page(new_page_t get_new_page, if (page_mapped(hpage)) { bool mapping_locked = false; - enum ttu_flags ttu = TTU_MIGRATION|TTU_IGNORE_MLOCK| - TTU_IGNORE_ACCESS; + enum ttu_flags ttu = TTU_MIGRATION|TTU_IGNORE_MLOCK; if (!PageAnon(hpage)) { /* @@ -2688,7 +2687,7 @@ static void migrate_vma_prepare(struct migrate_vma *migrate) */ static void migrate_vma_unmap(struct migrate_vma *migrate) { - int flags = TTU_MIGRATION | TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS; + int flags = TTU_MIGRATION | TTU_IGNORE_MLOCK; const unsigned long npages = migrate->npages; const unsigned long start = migrate->start; unsigned long addr, i, restore = 0; diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 586042472ac901..eb34d204d4ee71 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -2826,7 +2826,7 @@ EXPORT_SYMBOL(__test_set_page_writeback); */ void wait_on_page_writeback(struct page *page) { - if (PageWriteback(page)) { + while (PageWriteback(page)) { trace_wait_on_page_writeback(page, page_mapping(page)); wait_on_page_bit(page, PG_writeback); } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index eaa227a479e4a4..88639706ae1774 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -448,6 +448,8 @@ defer_init(int nid, unsigned long pfn, unsigned long end_pfn) if (end_pfn < pgdat_end_pfn(NODE_DATA(nid))) return false; + if (NODE_DATA(nid)->first_deferred_pfn != ULONG_MAX) + return true; /* * We start only with one section of pages, more pages are added as * needed until the rest of deferred pages are initialized. @@ -2470,12 +2472,12 @@ static bool can_steal_fallback(unsigned int order, int start_mt) return false; } -static inline void boost_watermark(struct zone *zone) +static inline bool boost_watermark(struct zone *zone) { unsigned long max_boost; if (!watermark_boost_factor) - return; + return false; /* * Don't bother in zones that are unlikely to produce results. * On small machines, including kdump capture kernels running @@ -2483,7 +2485,7 @@ static inline void boost_watermark(struct zone *zone) * memory situation immediately. */ if ((pageblock_nr_pages * 4) > zone_managed_pages(zone)) - return; + return false; max_boost = mult_frac(zone->_watermark[WMARK_HIGH], watermark_boost_factor, 10000); @@ -2497,12 +2499,14 @@ static inline void boost_watermark(struct zone *zone) * boosted watermark resulting in a hang. */ if (!max_boost) - return; + return false; max_boost = max(pageblock_nr_pages, max_boost); zone->watermark_boost = min(zone->watermark_boost + pageblock_nr_pages, max_boost); + + return true; } /* @@ -2540,8 +2544,7 @@ static void steal_suitable_fallback(struct zone *zone, struct page *page, * likelihood of future fallbacks. Wake kswapd now as the node * may be balanced overall and kswapd will not wake naturally. */ - boost_watermark(zone); - if (alloc_flags & ALLOC_KSWAPD) + if (boost_watermark(zone) && (alloc_flags & ALLOC_KSWAPD)) set_bit(ZONE_BOOSTED_WATERMARK, &zone->flags); /* We are not allowed to try stealing from the whole block */ @@ -2843,20 +2846,20 @@ __rmqueue(struct zone *zone, unsigned int order, int migratetype, { struct page *page; -#ifdef CONFIG_CMA - /* - * Balance movable allocations between regular and CMA areas by - * allocating from CMA when over half of the zone's free memory - * is in the CMA area. - */ - if (alloc_flags & ALLOC_CMA && - zone_page_state(zone, NR_FREE_CMA_PAGES) > - zone_page_state(zone, NR_FREE_PAGES) / 2) { - page = __rmqueue_cma_fallback(zone, order); - if (page) - return page; + if (IS_ENABLED(CONFIG_CMA)) { + /* + * Balance movable allocations between regular and CMA areas by + * allocating from CMA when over half of the zone's free memory + * is in the CMA area. + */ + if (alloc_flags & ALLOC_CMA && + zone_page_state(zone, NR_FREE_CMA_PAGES) > + zone_page_state(zone, NR_FREE_PAGES) / 2) { + page = __rmqueue_cma_fallback(zone, order); + if (page) + goto out; + } } -#endif retry: page = __rmqueue_smallest(zone, order, migratetype); if (unlikely(!page)) { @@ -2867,8 +2870,9 @@ __rmqueue(struct zone *zone, unsigned int order, int migratetype, alloc_flags)) goto retry; } - - trace_mm_page_alloc_zone_locked(page, order, migratetype); +out: + if (page) + trace_mm_page_alloc_zone_locked(page, order, migratetype); return page; } @@ -6049,7 +6053,7 @@ overlap_memmap_init(unsigned long zone, unsigned long *pfn) * zone stats (e.g., nr_isolate_pageblock) are touched. */ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, - unsigned long start_pfn, + unsigned long start_pfn, unsigned long zone_end_pfn, enum meminit_context context, struct vmem_altmap *altmap, int migratetype) { @@ -6085,7 +6089,7 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, if (context == MEMINIT_EARLY) { if (overlap_memmap_init(zone, &pfn)) continue; - if (defer_init(nid, pfn, end_pfn)) + if (defer_init(nid, pfn, zone_end_pfn)) break; } @@ -6199,7 +6203,7 @@ void __meminit __weak memmap_init(unsigned long size, int nid, if (end_pfn > start_pfn) { size = end_pfn - start_pfn; - memmap_init_zone(size, nid, zone, start_pfn, + memmap_init_zone(size, nid, zone, start_pfn, range_end_pfn, MEMINIT_EARLY, NULL, MIGRATE_MOVABLE); } } diff --git a/mm/process_vm_access.c b/mm/process_vm_access.c index 702250f148e73a..c90d722c618171 100644 --- a/mm/process_vm_access.c +++ b/mm/process_vm_access.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include diff --git a/mm/rmap.c b/mm/rmap.c index 31b29321adfe1e..6657000b18d41b 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1533,15 +1533,6 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, goto discard; } - if (!(flags & TTU_IGNORE_ACCESS)) { - if (ptep_clear_flush_young_notify(vma, address, - pvmw.pte)) { - ret = false; - page_vma_mapped_walk_done(&pvmw); - break; - } - } - /* Nuke the page table entry. */ flush_cache_page(vma, address, pte_pfn(*pvmw.pte)); if (should_defer_flush(mm, flags)) { diff --git a/mm/slub.c b/mm/slub.c index 34dcc09e2ec9b0..071e41067ea672 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1971,7 +1971,7 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n, t = acquire_slab(s, n, page, object == NULL, &objects); if (!t) - break; + continue; /* cmpxchg raced */ available += objects; if (!object) { @@ -5620,10 +5620,8 @@ static int sysfs_slab_add(struct kmem_cache *s) s->kobj.kset = kset; err = kobject_init_and_add(&s->kobj, &slab_ktype, NULL, "%s", name); - if (err) { - kobject_put(&s->kobj); + if (err) goto out; - } err = sysfs_create_group(&s->kobj, &slab_attr_group); if (err) diff --git a/mm/swapfile.c b/mm/swapfile.c index d58361109066d6..16db9d1ebcbf39 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -1045,16 +1045,18 @@ int get_swap_pages(int n_goal, swp_entry_t swp_entries[], int entry_size) /* Only single cluster request supported */ WARN_ON_ONCE(n_goal > 1 && size == SWAPFILE_CLUSTER); + spin_lock(&swap_avail_lock); + avail_pgs = atomic_long_read(&nr_swap_pages) / size; - if (avail_pgs <= 0) + if (avail_pgs <= 0) { + spin_unlock(&swap_avail_lock); goto noswap; + } n_goal = min3((long)n_goal, (long)SWAP_BATCH, avail_pgs); atomic_long_sub(n_goal * size, &nr_swap_pages); - spin_lock(&swap_avail_lock); - start_over: node = numa_node_id(); plist_for_each_entry_safe(si, next, &swap_avail_heads[node], avail_lists[node]) { @@ -1128,14 +1130,13 @@ swp_entry_t get_swap_page_of_type(int type) spin_lock(&si->lock); if (si->flags & SWP_WRITEOK) { - atomic_long_dec(&nr_swap_pages); /* This is called for allocating swap entry, not cache */ offset = scan_swap_map(si, 1); if (offset) { + atomic_long_dec(&nr_swap_pages); spin_unlock(&si->lock); return swp_entry(type, offset); } - atomic_long_inc(&nr_swap_pages); } spin_unlock(&si->lock); fail: diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 6ae491a8b210f3..fff03a331314fb 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -2256,7 +2256,7 @@ static void __vunmap(const void *addr, int deallocate_pages) debug_check_no_locks_freed(area->addr, get_vm_area_size(area)); debug_check_no_obj_freed(area->addr, get_vm_area_size(area)); - kasan_poison_vmalloc(area->addr, area->size); + kasan_poison_vmalloc(area->addr, get_vm_area_size(area)); vm_remove_mappings(area, deallocate_pages); @@ -2405,8 +2405,10 @@ void *vmap(struct page **pages, unsigned int count, return NULL; } - if (flags & VM_MAP_PUT_PAGES) + if (flags & VM_MAP_PUT_PAGES) { area->pages = pages; + area->nr_pages = count; + } return area->addr; } EXPORT_SYMBOL(vmap); @@ -3448,11 +3450,11 @@ static void *s_next(struct seq_file *m, void *p, loff_t *pos) } static void s_stop(struct seq_file *m, void *p) - __releases(&vmap_purge_lock) __releases(&vmap_area_lock) + __releases(&vmap_purge_lock) { - mutex_unlock(&vmap_purge_lock); spin_unlock(&vmap_area_lock); + mutex_unlock(&vmap_purge_lock); } static void show_numa_info(struct seq_file *m, struct vm_struct *v) diff --git a/mm/vmscan.c b/mm/vmscan.c index 7b4e31eac2cff1..4c5a9b2286bf5b 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1072,7 +1072,6 @@ static void page_check_dirty_writeback(struct page *page, static unsigned int shrink_page_list(struct list_head *page_list, struct pglist_data *pgdat, struct scan_control *sc, - enum ttu_flags ttu_flags, struct reclaim_stat *stat, bool ignore_references) { @@ -1241,6 +1240,8 @@ static unsigned int shrink_page_list(struct list_head *page_list, if (!PageSwapCache(page)) { if (!(sc->gfp_mask & __GFP_IO)) goto keep_locked; + if (page_maybe_dma_pinned(page)) + goto keep_locked; if (PageTransHuge(page)) { /* cannot split THP, skip it */ if (!can_split_huge_page(page, NULL)) @@ -1297,7 +1298,7 @@ static unsigned int shrink_page_list(struct list_head *page_list, * processes. Try to unmap it here. */ if (page_mapped(page)) { - enum ttu_flags flags = ttu_flags | TTU_BATCH_FLUSH; + enum ttu_flags flags = TTU_BATCH_FLUSH; bool was_swapbacked = PageSwapBacked(page); if (unlikely(PageTransHuge(page))) @@ -1514,7 +1515,7 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone, } nr_reclaimed = shrink_page_list(&clean_pages, zone->zone_pgdat, &sc, - TTU_IGNORE_ACCESS, &stat, true); + &stat, true); list_splice(&clean_pages, page_list); mod_node_page_state(zone->zone_pgdat, NR_ISOLATED_FILE, -(long)nr_reclaimed); @@ -1958,8 +1959,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, if (nr_taken == 0) return 0; - nr_reclaimed = shrink_page_list(&page_list, pgdat, sc, 0, - &stat, false); + nr_reclaimed = shrink_page_list(&page_list, pgdat, sc, &stat, false); spin_lock_irq(&pgdat->lru_lock); @@ -2131,8 +2131,7 @@ unsigned long reclaim_pages(struct list_head *page_list) nr_reclaimed += shrink_page_list(&node_page_list, NODE_DATA(nid), - &sc, 0, - &dummy_stat, false); + &sc, &dummy_stat, false); while (!list_empty(&node_page_list)) { page = lru_to_page(&node_page_list); list_del(&page->lru); @@ -2145,8 +2144,7 @@ unsigned long reclaim_pages(struct list_head *page_list) if (!list_empty(&node_page_list)) { nr_reclaimed += shrink_page_list(&node_page_list, NODE_DATA(nid), - &sc, 0, - &dummy_stat, false); + &sc, &dummy_stat, false); while (!list_empty(&node_page_list)) { page = lru_to_page(&node_page_list); list_del(&page->lru); diff --git a/mm/z3fold.c b/mm/z3fold.c index 18feaa0bc53773..0152ad9931a87a 100644 --- a/mm/z3fold.c +++ b/mm/z3fold.c @@ -90,7 +90,7 @@ struct z3fold_buddy_slots { * be enough slots to hold all possible variants */ unsigned long slot[BUDDY_MASK + 1]; - unsigned long pool; /* back link + flags */ + unsigned long pool; /* back link */ rwlock_t lock; }; #define HANDLE_FLAG_MASK (0x03) @@ -185,7 +185,7 @@ enum z3fold_page_flags { * handle flags, go under HANDLE_FLAG_MASK */ enum z3fold_handle_flags { - HANDLES_ORPHANED = 0, + HANDLES_NOFREE = 0, }; /* @@ -303,10 +303,9 @@ static inline void put_z3fold_header(struct z3fold_header *zhdr) z3fold_page_unlock(zhdr); } -static inline void free_handle(unsigned long handle) +static inline void free_handle(unsigned long handle, struct z3fold_header *zhdr) { struct z3fold_buddy_slots *slots; - struct z3fold_header *zhdr; int i; bool is_free; @@ -316,22 +315,19 @@ static inline void free_handle(unsigned long handle) if (WARN_ON(*(unsigned long *)handle == 0)) return; - zhdr = handle_to_z3fold_header(handle); slots = handle_to_slots(handle); write_lock(&slots->lock); *(unsigned long *)handle = 0; - if (zhdr->slots == slots) { + + if (test_bit(HANDLES_NOFREE, &slots->pool)) { write_unlock(&slots->lock); return; /* simple case, nothing else to do */ } - /* we are freeing a foreign handle if we are here */ - zhdr->foreign_handles--; + if (zhdr->slots != slots) + zhdr->foreign_handles--; + is_free = true; - if (!test_bit(HANDLES_ORPHANED, &slots->pool)) { - write_unlock(&slots->lock); - return; - } for (i = 0; i <= BUDDY_MASK; i++) { if (slots->slot[i]) { is_free = false; @@ -343,6 +339,8 @@ static inline void free_handle(unsigned long handle) if (is_free) { struct z3fold_pool *pool = slots_to_pool(slots); + if (zhdr->slots == slots) + zhdr->slots = NULL; kmem_cache_free(pool->c_handle, slots); } } @@ -525,8 +523,6 @@ static void __release_z3fold_page(struct z3fold_header *zhdr, bool locked) { struct page *page = virt_to_page(zhdr); struct z3fold_pool *pool = zhdr_to_pool(zhdr); - bool is_free = true; - int i; WARN_ON(!list_empty(&zhdr->buddy)); set_bit(PAGE_STALE, &page->private); @@ -536,21 +532,6 @@ static void __release_z3fold_page(struct z3fold_header *zhdr, bool locked) list_del_init(&page->lru); spin_unlock(&pool->lock); - /* If there are no foreign handles, free the handles array */ - read_lock(&zhdr->slots->lock); - for (i = 0; i <= BUDDY_MASK; i++) { - if (zhdr->slots->slot[i]) { - is_free = false; - break; - } - } - if (!is_free) - set_bit(HANDLES_ORPHANED, &zhdr->slots->pool); - read_unlock(&zhdr->slots->lock); - - if (is_free) - kmem_cache_free(pool->c_handle, zhdr->slots); - if (locked) z3fold_page_unlock(zhdr); @@ -653,6 +634,28 @@ static inline void add_to_unbuddied(struct z3fold_pool *pool, } } +static inline enum buddy get_free_buddy(struct z3fold_header *zhdr, int chunks) +{ + enum buddy bud = HEADLESS; + + if (zhdr->middle_chunks) { + if (!zhdr->first_chunks && + chunks <= zhdr->start_middle - ZHDR_CHUNKS) + bud = FIRST; + else if (!zhdr->last_chunks) + bud = LAST; + } else { + if (!zhdr->first_chunks) + bud = FIRST; + else if (!zhdr->last_chunks) + bud = LAST; + else + bud = MIDDLE; + } + + return bud; +} + static inline void *mchunk_memmove(struct z3fold_header *zhdr, unsigned short dst_chunk) { @@ -714,18 +717,7 @@ static struct z3fold_header *compact_single_buddy(struct z3fold_header *zhdr) if (WARN_ON(new_zhdr == zhdr)) goto out_fail; - if (new_zhdr->first_chunks == 0) { - if (new_zhdr->middle_chunks != 0 && - chunks >= new_zhdr->start_middle) { - new_bud = LAST; - } else { - new_bud = FIRST; - } - } else if (new_zhdr->last_chunks == 0) { - new_bud = LAST; - } else if (new_zhdr->middle_chunks == 0) { - new_bud = MIDDLE; - } + new_bud = get_free_buddy(new_zhdr, chunks); q = new_zhdr; switch (new_bud) { case FIRST: @@ -847,9 +839,8 @@ static void do_compact_page(struct z3fold_header *zhdr, bool locked) return; } - if (unlikely(PageIsolated(page) || - test_bit(PAGE_CLAIMED, &page->private) || - test_bit(PAGE_STALE, &page->private))) { + if (test_bit(PAGE_STALE, &page->private) || + test_and_set_bit(PAGE_CLAIMED, &page->private)) { z3fold_page_unlock(zhdr); return; } @@ -858,13 +849,16 @@ static void do_compact_page(struct z3fold_header *zhdr, bool locked) zhdr->mapped_count == 0 && compact_single_buddy(zhdr)) { if (kref_put(&zhdr->refcount, release_z3fold_page_locked)) atomic64_dec(&pool->pages_nr); - else + else { + clear_bit(PAGE_CLAIMED, &page->private); z3fold_page_unlock(zhdr); + } return; } z3fold_compact_page(zhdr); add_to_unbuddied(pool, zhdr); + clear_bit(PAGE_CLAIMED, &page->private); z3fold_page_unlock(zhdr); } @@ -973,6 +967,9 @@ static inline struct z3fold_header *__z3fold_alloc(struct z3fold_pool *pool, } } + if (zhdr && !zhdr->slots) + zhdr->slots = alloc_slots(pool, + can_sleep ? GFP_NOIO : GFP_ATOMIC); return zhdr; } @@ -1109,17 +1106,8 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp, retry: zhdr = __z3fold_alloc(pool, size, can_sleep); if (zhdr) { - if (zhdr->first_chunks == 0) { - if (zhdr->middle_chunks != 0 && - chunks >= zhdr->start_middle) - bud = LAST; - else - bud = FIRST; - } else if (zhdr->last_chunks == 0) - bud = LAST; - else if (zhdr->middle_chunks == 0) - bud = MIDDLE; - else { + bud = get_free_buddy(zhdr, chunks); + if (bud == HEADLESS) { if (kref_put(&zhdr->refcount, release_z3fold_page_locked)) atomic64_dec(&pool->pages_nr); @@ -1265,12 +1253,11 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle) pr_err("%s: unknown bud %d\n", __func__, bud); WARN_ON(1); put_z3fold_header(zhdr); - clear_bit(PAGE_CLAIMED, &page->private); return; } if (!page_claimed) - free_handle(handle); + free_handle(handle, zhdr); if (kref_put(&zhdr->refcount, release_z3fold_page_locked_list)) { atomic64_dec(&pool->pages_nr); return; @@ -1280,8 +1267,7 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle) z3fold_page_unlock(zhdr); return; } - if (unlikely(PageIsolated(page)) || - test_and_set_bit(NEEDS_COMPACTING, &page->private)) { + if (test_and_set_bit(NEEDS_COMPACTING, &page->private)) { put_z3fold_header(zhdr); clear_bit(PAGE_CLAIMED, &page->private); return; @@ -1345,6 +1331,10 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries) struct page *page = NULL; struct list_head *pos; unsigned long first_handle = 0, middle_handle = 0, last_handle = 0; + struct z3fold_buddy_slots slots __attribute__((aligned(SLOTS_ALIGN))); + + rwlock_init(&slots.lock); + slots.pool = (unsigned long)pool | (1 << HANDLES_NOFREE); spin_lock(&pool->lock); if (!pool->ops || !pool->ops->evict || retries == 0) { @@ -1359,35 +1349,36 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries) list_for_each_prev(pos, &pool->lru) { page = list_entry(pos, struct page, lru); - /* this bit could have been set by free, in which case - * we pass over to the next page in the pool. - */ - if (test_and_set_bit(PAGE_CLAIMED, &page->private)) { - page = NULL; - continue; - } - - if (unlikely(PageIsolated(page))) { - clear_bit(PAGE_CLAIMED, &page->private); - page = NULL; - continue; - } zhdr = page_address(page); if (test_bit(PAGE_HEADLESS, &page->private)) break; + if (kref_get_unless_zero(&zhdr->refcount) == 0) { + zhdr = NULL; + break; + } if (!z3fold_page_trylock(zhdr)) { - clear_bit(PAGE_CLAIMED, &page->private); + if (kref_put(&zhdr->refcount, + release_z3fold_page)) + atomic64_dec(&pool->pages_nr); zhdr = NULL; continue; /* can't evict at this point */ } - if (zhdr->foreign_handles) { - clear_bit(PAGE_CLAIMED, &page->private); - z3fold_page_unlock(zhdr); + + /* test_and_set_bit is of course atomic, but we still + * need to do it under page lock, otherwise checking + * that bit in __z3fold_alloc wouldn't make sense + */ + if (zhdr->foreign_handles || + test_and_set_bit(PAGE_CLAIMED, &page->private)) { + if (kref_put(&zhdr->refcount, + release_z3fold_page)) + atomic64_dec(&pool->pages_nr); + else + z3fold_page_unlock(zhdr); zhdr = NULL; continue; /* can't evict such page */ } - kref_get(&zhdr->refcount); list_del_init(&zhdr->buddy); zhdr->cpu = -1; break; @@ -1409,12 +1400,16 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries) first_handle = 0; last_handle = 0; middle_handle = 0; + memset(slots.slot, 0, sizeof(slots.slot)); if (zhdr->first_chunks) - first_handle = encode_handle(zhdr, FIRST); + first_handle = __encode_handle(zhdr, &slots, + FIRST); if (zhdr->middle_chunks) - middle_handle = encode_handle(zhdr, MIDDLE); + middle_handle = __encode_handle(zhdr, &slots, + MIDDLE); if (zhdr->last_chunks) - last_handle = encode_handle(zhdr, LAST); + last_handle = __encode_handle(zhdr, &slots, + LAST); /* * it's safe to unlock here because we hold a * reference to this page @@ -1429,19 +1424,16 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries) ret = pool->ops->evict(pool, middle_handle); if (ret) goto next; - free_handle(middle_handle); } if (first_handle) { ret = pool->ops->evict(pool, first_handle); if (ret) goto next; - free_handle(first_handle); } if (last_handle) { ret = pool->ops->evict(pool, last_handle); if (ret) goto next; - free_handle(last_handle); } next: if (test_bit(PAGE_HEADLESS, &page->private)) { @@ -1455,9 +1447,11 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries) spin_unlock(&pool->lock); clear_bit(PAGE_CLAIMED, &page->private); } else { + struct z3fold_buddy_slots *slots = zhdr->slots; z3fold_page_lock(zhdr); if (kref_put(&zhdr->refcount, release_z3fold_page_locked)) { + kmem_cache_free(pool->c_handle, slots); atomic64_dec(&pool->pages_nr); return 0; } @@ -1573,8 +1567,7 @@ static bool z3fold_page_isolate(struct page *page, isolate_mode_t mode) VM_BUG_ON_PAGE(!PageMovable(page), page); VM_BUG_ON_PAGE(PageIsolated(page), page); - if (test_bit(PAGE_HEADLESS, &page->private) || - test_bit(PAGE_CLAIMED, &page->private)) + if (test_bit(PAGE_HEADLESS, &page->private)) return false; zhdr = page_address(page); @@ -1586,6 +1579,8 @@ static bool z3fold_page_isolate(struct page *page, isolate_mode_t mode) if (zhdr->mapped_count != 0 || zhdr->foreign_handles != 0) goto out; + if (test_and_set_bit(PAGE_CLAIMED, &page->private)) + goto out; pool = zhdr_to_pool(zhdr); spin_lock(&pool->lock); if (!list_empty(&zhdr->buddy)) @@ -1612,16 +1607,17 @@ static int z3fold_page_migrate(struct address_space *mapping, struct page *newpa VM_BUG_ON_PAGE(!PageMovable(page), page); VM_BUG_ON_PAGE(!PageIsolated(page), page); + VM_BUG_ON_PAGE(!test_bit(PAGE_CLAIMED, &page->private), page); VM_BUG_ON_PAGE(!PageLocked(newpage), newpage); zhdr = page_address(page); pool = zhdr_to_pool(zhdr); - if (!z3fold_page_trylock(zhdr)) { + if (!z3fold_page_trylock(zhdr)) return -EAGAIN; - } if (zhdr->mapped_count != 0 || zhdr->foreign_handles != 0) { z3fold_page_unlock(zhdr); + clear_bit(PAGE_CLAIMED, &page->private); return -EBUSY; } if (work_pending(&zhdr->work)) { @@ -1663,6 +1659,7 @@ static int z3fold_page_migrate(struct address_space *mapping, struct page *newpa queue_work_on(new_zhdr->cpu, pool->compact_wq, &new_zhdr->work); page_mapcount_reset(page); + clear_bit(PAGE_CLAIMED, &page->private); put_page(page); return 0; } @@ -1686,6 +1683,7 @@ static void z3fold_page_putback(struct page *page) spin_lock(&pool->lock); list_add(&page->lru, &pool->lru); spin_unlock(&pool->lock); + clear_bit(PAGE_CLAIMED, &page->private); z3fold_page_unlock(zhdr); } diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index f292e0267bb9ea..15bbfaf943fd12 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -284,7 +284,8 @@ static int register_vlan_device(struct net_device *real_dev, u16 vlan_id) return 0; out_free_newdev: - if (new_dev->reg_state == NETREG_UNINITIALIZED) + if (new_dev->reg_state == NETREG_UNINITIALIZED || + new_dev->reg_state == NETREG_UNREGISTERED) free_netdev(new_dev); return err; } diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 502552d6e9aff3..c4aa2cbb926974 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -763,7 +763,7 @@ static int hci_init3_req(struct hci_request *req, unsigned long opt) hci_req_add(req, HCI_OP_LE_CLEAR_RESOLV_LIST, 0, NULL); } - if (hdev->commands[35] & 0x40) { + if (hdev->commands[35] & 0x04) { __le16 rpa_timeout = cpu_to_le16(hdev->rpa_timeout); /* Set RPA timeout */ diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index f04963914366e0..17a72695865b55 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -4941,6 +4941,11 @@ static void hci_phy_link_complete_evt(struct hci_dev *hdev, return; } + if (!hcon->amp_mgr) { + hci_dev_unlock(hdev); + return; + } + if (ev->status) { hci_conn_del(hcon); hci_dev_unlock(hdev); @@ -5868,21 +5873,19 @@ static void hci_le_direct_adv_report_evt(struct hci_dev *hdev, struct sk_buff *skb) { u8 num_reports = skb->data[0]; - void *ptr = &skb->data[1]; + struct hci_ev_le_direct_adv_info *ev = (void *)&skb->data[1]; - hci_dev_lock(hdev); + if (!num_reports || skb->len < num_reports * sizeof(*ev) + 1) + return; - while (num_reports--) { - struct hci_ev_le_direct_adv_info *ev = ptr; + hci_dev_lock(hdev); + for (; num_reports; num_reports--, ev++) process_adv_report(hdev, ev->evt_type, &ev->bdaddr, ev->bdaddr_type, &ev->direct_addr, ev->direct_addr_type, ev->rssi, NULL, 0, false); - ptr += sizeof(*ev); - } - hci_dev_unlock(hdev); } diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 6f12bab4d2fa6b..610ed0817bd77e 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -698,7 +698,8 @@ static void del_from_white_list(struct hci_request *req, bdaddr_t *bdaddr, cp.bdaddr_type); hci_req_add(req, HCI_OP_LE_DEL_FROM_WHITE_LIST, sizeof(cp), &cp); - if (use_ll_privacy(req->hdev)) { + if (use_ll_privacy(req->hdev) && + hci_dev_test_flag(req->hdev, HCI_ENABLE_LL_PRIVACY)) { struct smp_irk *irk; irk = hci_find_irk_by_addr(req->hdev, bdaddr, bdaddr_type); @@ -732,7 +733,8 @@ static int add_to_white_list(struct hci_request *req, return -1; /* White list can not be used with RPAs */ - if (!allow_rpa && !use_ll_privacy(hdev) && + if (!allow_rpa && + !hci_dev_test_flag(hdev, HCI_ENABLE_LL_PRIVACY) && hci_find_irk_by_addr(hdev, ¶ms->addr, params->addr_type)) { return -1; } @@ -750,7 +752,8 @@ static int add_to_white_list(struct hci_request *req, cp.bdaddr_type); hci_req_add(req, HCI_OP_LE_ADD_TO_WHITE_LIST, sizeof(cp), &cp); - if (use_ll_privacy(hdev)) { + if (use_ll_privacy(hdev) && + hci_dev_test_flag(hdev, HCI_ENABLE_LL_PRIVACY)) { struct smp_irk *irk; irk = hci_find_irk_by_addr(hdev, ¶ms->addr, @@ -812,7 +815,8 @@ static u8 update_white_list(struct hci_request *req) } /* White list can not be used with RPAs */ - if (!allow_rpa && !use_ll_privacy(hdev) && + if (!allow_rpa && + !hci_dev_test_flag(hdev, HCI_ENABLE_LL_PRIVACY) && hci_find_irk_by_addr(hdev, &b->bdaddr, b->bdaddr_type)) { return 0x00; } diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 79ffcdef0b7ad5..22a110f37abc6b 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -1003,6 +1003,11 @@ static int sco_sock_getsockopt(struct socket *sock, int level, int optname, case BT_SNDMTU: case BT_RCVMTU: + if (sk->sk_state != BT_CONNECTED) { + err = -ENOTCONN; + break; + } + if (put_user(sco_pi(sk)->conn->mtu, (u32 __user *)optval)) err = -EFAULT; break; diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index c1c30a9f76f343..8b796c499cbb24 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -272,7 +272,8 @@ int bpf_prog_test_run_raw_tp(struct bpf_prog *prog, kattr->test.repeat) return -EINVAL; - if (ctx_size_in < prog->aux->max_ctx_offset) + if (ctx_size_in < prog->aux->max_ctx_offset || + ctx_size_in > MAX_BPF_FUNC_ARGS * sizeof(u64)) return -EINVAL; if ((kattr->test.flags & BPF_F_TEST_RUN_ON_CPU) == 0 && cpu != 0) diff --git a/net/can/isotp.c b/net/can/isotp.c index 26bdc3c20b7e48..8bd565f2073e72 100644 --- a/net/can/isotp.c +++ b/net/can/isotp.c @@ -1139,6 +1139,7 @@ static int isotp_getname(struct socket *sock, struct sockaddr *uaddr, int peer) if (peer) return -EOPNOTSUPP; + memset(addr, 0, sizeof(*addr)); addr->can_family = AF_CAN; addr->can_ifindex = so->ifindex; addr->can_addr.tp.rx_id = so->rxid; diff --git a/net/core/dev.c b/net/core/dev.c index 38412e70f76185..81e5d482c238e3 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -9602,6 +9602,11 @@ static netdev_features_t netdev_fix_features(struct net_device *dev, } } + if ((features & NETIF_F_HW_TLS_RX) && !(features & NETIF_F_RXCSUM)) { + netdev_dbg(dev, "Dropping TLS RX HW offload feature since no RXCSUM feature.\n"); + features &= ~NETIF_F_HW_TLS_RX; + } + return features; } diff --git a/net/core/devlink.c b/net/core/devlink.c index 8c5ddffd707def..5d397838bceb60 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -4134,7 +4134,7 @@ static int devlink_nl_cmd_port_param_get_dumpit(struct sk_buff *msg, static int devlink_nl_cmd_port_param_get_doit(struct sk_buff *skb, struct genl_info *info) { - struct devlink_port *devlink_port = info->user_ptr[0]; + struct devlink_port *devlink_port = info->user_ptr[1]; struct devlink_param_item *param_item; struct sk_buff *msg; int err; @@ -4163,7 +4163,7 @@ static int devlink_nl_cmd_port_param_get_doit(struct sk_buff *skb, static int devlink_nl_cmd_port_param_set_doit(struct sk_buff *skb, struct genl_info *info) { - struct devlink_port *devlink_port = info->user_ptr[0]; + struct devlink_port *devlink_port = info->user_ptr[1]; return __devlink_nl_cmd_param_set_doit(devlink_port->devlink, devlink_port->index, diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c index 80dbf2f4016e26..8e582e29a41e39 100644 --- a/net/core/gen_estimator.c +++ b/net/core/gen_estimator.c @@ -80,11 +80,11 @@ static void est_timer(struct timer_list *t) u64 rate, brate; est_fetch_counters(est, &b); - brate = (b.bytes - est->last_bytes) << (10 - est->ewma_log - est->intvl_log); - brate -= (est->avbps >> est->ewma_log); + brate = (b.bytes - est->last_bytes) << (10 - est->intvl_log); + brate = (brate >> est->ewma_log) - (est->avbps >> est->ewma_log); - rate = (b.packets - est->last_packets) << (10 - est->ewma_log - est->intvl_log); - rate -= (est->avpps >> est->ewma_log); + rate = (b.packets - est->last_packets) << (10 - est->intvl_log); + rate = (rate >> est->ewma_log) - (est->avpps >> est->ewma_log); write_seqcount_begin(&est->seq); est->avbps += brate; @@ -143,6 +143,9 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats, if (parm->interval < -2 || parm->interval > 3) return -EINVAL; + if (parm->ewma_log == 0 || parm->ewma_log >= 31) + return -EINVAL; + est = kzalloc(sizeof(*est), GFP_KERNEL); if (!est) return -ENOBUFS; diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 94fff0700bdd34..b4562f9d074cf2 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -1317,8 +1317,8 @@ static const struct attribute_group dql_group = { static ssize_t xps_cpus_show(struct netdev_queue *queue, char *buf) { + int cpu, len, ret, num_tc = 1, tc = 0; struct net_device *dev = queue->dev; - int cpu, len, num_tc = 1, tc = 0; struct xps_dev_maps *dev_maps; cpumask_var_t mask; unsigned long index; @@ -1328,22 +1328,31 @@ static ssize_t xps_cpus_show(struct netdev_queue *queue, index = get_netdev_queue_index(queue); + if (!rtnl_trylock()) + return restart_syscall(); + if (dev->num_tc) { /* Do not allow XPS on subordinate device directly */ num_tc = dev->num_tc; - if (num_tc < 0) - return -EINVAL; + if (num_tc < 0) { + ret = -EINVAL; + goto err_rtnl_unlock; + } /* If queue belongs to subordinate dev use its map */ dev = netdev_get_tx_queue(dev, index)->sb_dev ? : dev; tc = netdev_txq_to_tc(dev, index); - if (tc < 0) - return -EINVAL; + if (tc < 0) { + ret = -EINVAL; + goto err_rtnl_unlock; + } } - if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) - return -ENOMEM; + if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) { + ret = -ENOMEM; + goto err_rtnl_unlock; + } rcu_read_lock(); dev_maps = rcu_dereference(dev->xps_cpus_map); @@ -1366,9 +1375,15 @@ static ssize_t xps_cpus_show(struct netdev_queue *queue, } rcu_read_unlock(); + rtnl_unlock(); + len = snprintf(buf, PAGE_SIZE, "%*pb\n", cpumask_pr_args(mask)); free_cpumask_var(mask); return len < PAGE_SIZE ? len : -EINVAL; + +err_rtnl_unlock: + rtnl_unlock(); + return ret; } static ssize_t xps_cpus_store(struct netdev_queue *queue, @@ -1396,7 +1411,13 @@ static ssize_t xps_cpus_store(struct netdev_queue *queue, return err; } + if (!rtnl_trylock()) { + free_cpumask_var(mask); + return restart_syscall(); + } + err = netif_set_xps_queue(dev, mask, index); + rtnl_unlock(); free_cpumask_var(mask); @@ -1408,22 +1429,29 @@ static struct netdev_queue_attribute xps_cpus_attribute __ro_after_init static ssize_t xps_rxqs_show(struct netdev_queue *queue, char *buf) { + int j, len, ret, num_tc = 1, tc = 0; struct net_device *dev = queue->dev; struct xps_dev_maps *dev_maps; unsigned long *mask, index; - int j, len, num_tc = 1, tc = 0; index = get_netdev_queue_index(queue); + if (!rtnl_trylock()) + return restart_syscall(); + if (dev->num_tc) { num_tc = dev->num_tc; tc = netdev_txq_to_tc(dev, index); - if (tc < 0) - return -EINVAL; + if (tc < 0) { + ret = -EINVAL; + goto err_rtnl_unlock; + } } mask = bitmap_zalloc(dev->num_rx_queues, GFP_KERNEL); - if (!mask) - return -ENOMEM; + if (!mask) { + ret = -ENOMEM; + goto err_rtnl_unlock; + } rcu_read_lock(); dev_maps = rcu_dereference(dev->xps_rxqs_map); @@ -1449,10 +1477,16 @@ static ssize_t xps_rxqs_show(struct netdev_queue *queue, char *buf) out_no_maps: rcu_read_unlock(); + rtnl_unlock(); + len = bitmap_print_to_pagebuf(false, buf, mask, dev->num_rx_queues); bitmap_free(mask); return len < PAGE_SIZE ? len : -EINVAL; + +err_rtnl_unlock: + rtnl_unlock(); + return ret; } static ssize_t xps_rxqs_store(struct netdev_queue *queue, const char *buf, @@ -1478,10 +1512,17 @@ static ssize_t xps_rxqs_store(struct netdev_queue *queue, const char *buf, return err; } + if (!rtnl_trylock()) { + bitmap_free(mask); + return restart_syscall(); + } + cpus_read_lock(); err = __netif_set_xps_queue(dev, mask, index, true); cpus_read_unlock(); + rtnl_unlock(); + bitmap_free(mask); return err ? : len; } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index e578544b2cc711..7ab56796bd3a9c 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -432,7 +432,11 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len, len += NET_SKB_PAD; - if ((len > SKB_WITH_OVERHEAD(PAGE_SIZE)) || + /* If requested length is either too small or too big, + * we use kmalloc() for skb->head allocation. + */ + if (len <= SKB_WITH_OVERHEAD(1024) || + len > SKB_WITH_OVERHEAD(PAGE_SIZE) || (gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA))) { skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX, NUMA_NO_NODE); if (!skb) @@ -496,13 +500,17 @@ EXPORT_SYMBOL(__netdev_alloc_skb); struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len, gfp_t gfp_mask) { - struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache); + struct napi_alloc_cache *nc; struct sk_buff *skb; void *data; len += NET_SKB_PAD + NET_IP_ALIGN; - if ((len > SKB_WITH_OVERHEAD(PAGE_SIZE)) || + /* If requested length is either too small or too big, + * we use kmalloc() for skb->head allocation. + */ + if (len <= SKB_WITH_OVERHEAD(1024) || + len > SKB_WITH_OVERHEAD(PAGE_SIZE) || (gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA))) { skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX, NUMA_NO_NODE); if (!skb) @@ -510,6 +518,7 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len, goto skb_success; } + nc = this_cpu_ptr(&napi_alloc_cache); len += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); len = SKB_DATA_ALIGN(len); @@ -2011,6 +2020,12 @@ int pskb_trim_rcsum_slow(struct sk_buff *skb, unsigned int len) skb->csum = csum_block_sub(skb->csum, skb_checksum(skb, len, delta, 0), len); + } else if (skb->ip_summed == CHECKSUM_PARTIAL) { + int hdlen = (len > skb_headlen(skb)) ? skb_headlen(skb) : len; + int offset = skb_checksum_start_offset(skb) + skb->csum_offset; + + if (offset + sizeof(__sum16) > hdlen) + return -EINVAL; } return __pskb_trim(skb, len); } @@ -3642,7 +3657,8 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb, unsigned int delta_truesize = 0; unsigned int delta_len = 0; struct sk_buff *tail = NULL; - struct sk_buff *nskb; + struct sk_buff *nskb, *tmp; + int err; skb_push(skb, -skb_network_offset(skb) + offset); @@ -3652,11 +3668,28 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb, nskb = list_skb; list_skb = list_skb->next; + err = 0; + if (skb_shared(nskb)) { + tmp = skb_clone(nskb, GFP_ATOMIC); + if (tmp) { + consume_skb(nskb); + nskb = tmp; + err = skb_unclone(nskb, GFP_ATOMIC); + } else { + err = -ENOMEM; + } + } + if (!tail) skb->next = nskb; else tail->next = nskb; + if (unlikely(err)) { + nskb->next = list_skb; + goto err_linearize; + } + tail = nskb; delta_len += nskb->len; diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c index bbdd3c7b6cb5b9..b065f0a103ed06 100644 --- a/net/core/sock_reuseport.c +++ b/net/core/sock_reuseport.c @@ -293,7 +293,7 @@ struct sock *reuseport_select_sock(struct sock *sk, i = j = reciprocal_scale(hash, socks); while (reuse->socks[i]->sk_state == TCP_ESTABLISHED) { i++; - if (i >= reuse->num_socks) + if (i >= socks) i = 0; if (i == j) goto out; diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index 16014ad1940667..a352ce4f878a37 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -1765,6 +1765,8 @@ static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, fn = &reply_funcs[dcb->cmd]; if (!fn->cb) return -EOPNOTSUPP; + if (fn->type == RTM_SETDCB && !netlink_capable(skb, CAP_NET_ADMIN)) + return -EPERM; if (!tb[DCB_ATTR_IFNAME]) return -EINVAL; diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 183003e45762ad..a47e0f9b20d0a9 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -353,9 +353,13 @@ static int dsa_port_devlink_setup(struct dsa_port *dp) static void dsa_port_teardown(struct dsa_port *dp) { + struct devlink_port *dlp = &dp->devlink_port; + if (!dp->setup) return; + devlink_port_type_clear(dlp); + switch (dp->type) { case DSA_PORT_TYPE_UNUSED: break; diff --git a/net/dsa/master.c b/net/dsa/master.c index c91de041a91d83..3a44da35dfeba1 100644 --- a/net/dsa/master.c +++ b/net/dsa/master.c @@ -308,8 +308,18 @@ static struct lock_class_key dsa_master_addr_list_lock_key; int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp) { + struct dsa_switch *ds = cpu_dp->ds; + struct device_link *consumer_link; int ret; + /* The DSA master must use SET_NETDEV_DEV for this to work. */ + consumer_link = device_link_add(ds->dev, dev->dev.parent, + DL_FLAG_AUTOREMOVE_CONSUMER); + if (!consumer_link) + netdev_err(dev, + "Failed to create a device link to DSA switch %s\n", + dev_name(ds->dev)); + rtnl_lock(); ret = dev_set_mtu(dev, ETH_DATA_LEN + cpu_dp->tag_ops->overhead); rtnl_unlock(); diff --git a/net/ethtool/channels.c b/net/ethtool/channels.c index 5635604cb9ba1e..25a9e566ef5cdd 100644 --- a/net/ethtool/channels.c +++ b/net/ethtool/channels.c @@ -194,8 +194,9 @@ int ethnl_set_channels(struct sk_buff *skb, struct genl_info *info) if (netif_is_rxfh_configured(dev) && !ethtool_get_max_rxfh_channel(dev, &max_rx_in_use) && (channels.combined_count + channels.rx_count) <= max_rx_in_use) { + ret = -EINVAL; GENL_SET_ERR_MSG(info, "requested channel counts are too low for existing indirection table settings"); - return -EINVAL; + goto out_ops; } /* Disabling channels, query zero-copy AF_XDP sockets */ @@ -203,8 +204,9 @@ int ethnl_set_channels(struct sk_buff *skb, struct genl_info *info) min(channels.rx_count, channels.tx_count); for (i = from_channel; i < old_total; i++) if (xsk_get_pool_from_qid(dev, i)) { + ret = -EINVAL; GENL_SET_ERR_MSG(info, "requested channel counts are too low for existing zerocopy AF_XDP sockets"); - return -EINVAL; + goto out_ops; } ret = dev->ethtool_ops->set_channels(dev, &channels); diff --git a/net/ethtool/strset.c b/net/ethtool/strset.c index 0baad0ce183282..c3a5489964cdeb 100644 --- a/net/ethtool/strset.c +++ b/net/ethtool/strset.c @@ -182,7 +182,7 @@ static int strset_parse_request(struct ethnl_req_info *req_base, ret = strset_get_id(attr, &id, extack); if (ret < 0) return ret; - if (ret >= ETH_SS_COUNT) { + if (id >= ETH_SS_COUNT) { NL_SET_ERR_MSG_ATTR(extack, attr, "unknown string set id"); return -EOPNOTSUPP; diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 8b07f3a4f2db25..a3271ec3e1627f 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -443,7 +443,6 @@ static int esp_output_encap(struct xfrm_state *x, struct sk_buff *skb, int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *esp) { u8 *tail; - u8 *vaddr; int nfrags; int esph_offset; struct page *page; @@ -485,14 +484,10 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info * page = pfrag->page; get_page(page); - vaddr = kmap_atomic(page); - - tail = vaddr + pfrag->offset; + tail = page_address(page) + pfrag->offset; esp_output_fill_trailer(tail, esp->tfclen, esp->plen, esp->proto); - kunmap_atomic(vaddr); - nfrags = skb_shinfo(skb)->nr_frags; __skb_fill_page_desc(skb, nfrags, page, pfrag->offset, diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index cdf6ec5aa45de3..84bb707bd88d84 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -292,7 +292,7 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb) .flowi4_iif = LOOPBACK_IFINDEX, .flowi4_oif = l3mdev_master_ifindex_rcu(dev), .daddr = ip_hdr(skb)->saddr, - .flowi4_tos = RT_TOS(ip_hdr(skb)->tos), + .flowi4_tos = ip_hdr(skb)->tos & IPTOS_RT_MASK, .flowi4_scope = scope, .flowi4_mark = vmark ? skb->mark : 0, }; diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c index 66fdbfe5447cdb..5d1e6fe9d8387b 100644 --- a/net/ipv4/gre_demux.c +++ b/net/ipv4/gre_demux.c @@ -128,7 +128,7 @@ int gre_parse_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, * to 0 and sets the configured key in the * inner erspan header field */ - if (greh->protocol == htons(ETH_P_ERSPAN) || + if ((greh->protocol == htons(ETH_P_ERSPAN) && hdr_len != 4) || greh->protocol == htons(ETH_P_ERSPAN2)) { struct erspan_base_hdr *ershdr; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index f60869acbef029..48d2b615edc261 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -851,6 +851,7 @@ struct sock *inet_csk_clone_lock(const struct sock *sk, newicsk->icsk_retransmits = 0; newicsk->icsk_backoff = 0; newicsk->icsk_probes_out = 0; + newicsk->icsk_probes_tstamp = 0; /* Deinitialize accept_queue to trap illegal accesses. */ memset(&newicsk->icsk_accept_queue, 0, sizeof(newicsk->icsk_accept_queue)); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 879b76ae4435ca..97975bed491add 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -302,7 +302,7 @@ static int __ip_finish_output(struct net *net, struct sock *sk, struct sk_buff * if (skb_is_gso(skb)) return ip_finish_output_gso(net, sk, skb, mtu); - if (skb->len > mtu || (IPCB(skb)->flags & IPSKB_FRAG_PMTU)) + if (skb->len > mtu || IPCB(skb)->frag_max_size) return ip_fragment(net, sk, skb, mtu, ip_finish_output2); return ip_finish_output2(net, sk, skb); diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index ee65c9225178d6..64594aa755f054 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -759,8 +759,11 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, goto tx_error; } - if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off, inner_iph, - 0, 0, false)) { + df = tnl_params->frag_off; + if (skb->protocol == htons(ETH_P_IP) && !tunnel->ignore_df) + df |= (inner_iph->frag_off & htons(IP_DF)); + + if (tnl_update_pmtu(dev, skb, rt, df, inner_iph, 0, 0, false)) { ip_rt_put(rt); goto tx_error; } @@ -788,10 +791,6 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, ttl = ip4_dst_hoplimit(&rt->dst); } - df = tnl_params->frag_off; - if (skb->protocol == htons(ETH_P_IP) && !tunnel->ignore_df) - df |= (inner_iph->frag_off&htons(IP_DF)); - max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr) + rt->dst.header_len + ip_encap_hlen(&tunnel->encap); if (max_headroom > dev->needed_headroom) diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 561f15b5a944ec..3cd13e1bc6a700 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -1441,7 +1441,7 @@ static int __init ip_auto_config(void) int retries = CONF_OPEN_RETRIES; #endif int err; - unsigned int i; + unsigned int i, count; /* Initialise all name servers and NTP servers to NONE (but only if the * "ip=" or "nfsaddrs=" kernel command line parameters weren't decoded, @@ -1575,7 +1575,7 @@ static int __init ip_auto_config(void) if (ic_dev_mtu) pr_cont(", mtu=%d", ic_dev_mtu); /* Name servers (if any): */ - for (i = 0; i < CONF_NAMESERVERS_MAX; i++) { + for (i = 0, count = 0; i < CONF_NAMESERVERS_MAX; i++) { if (ic_nameservers[i] != NONE) { if (i == 0) pr_info(" nameserver%u=%pI4", @@ -1583,12 +1583,14 @@ static int __init ip_auto_config(void) else pr_cont(", nameserver%u=%pI4", i, &ic_nameservers[i]); + + count++; } - if (i + 1 == CONF_NAMESERVERS_MAX) + if ((i + 1 == CONF_NAMESERVERS_MAX) && count > 0) pr_cont("\n"); } /* NTP servers (if any): */ - for (i = 0; i < CONF_NTP_SERVERS_MAX; i++) { + for (i = 0, count = 0; i < CONF_NTP_SERVERS_MAX; i++) { if (ic_ntp_servers[i] != NONE) { if (i == 0) pr_info(" ntpserver%u=%pI4", @@ -1596,8 +1598,10 @@ static int __init ip_auto_config(void) else pr_cont(", ntpserver%u=%pI4", i, &ic_ntp_servers[i]); + + count++; } - if (i + 1 == CONF_NTP_SERVERS_MAX) + if ((i + 1 == CONF_NTP_SERVERS_MAX) && count > 0) pr_cont("\n"); } #endif /* !SILENT */ diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 563b62b76a5f18..c576a63d09db1b 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -1379,7 +1379,7 @@ static int compat_get_entries(struct net *net, xt_compat_lock(NFPROTO_ARP); t = xt_find_table_lock(net, NFPROTO_ARP, get.name); if (!IS_ERR(t)) { - const struct xt_table_info *private = t->private; + const struct xt_table_info *private = xt_table_get_private_protected(t); struct xt_table_info info; ret = compat_table_info(private, &info); diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 6e2851f8d3a3fa..e8f6f9d8623763 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -1589,7 +1589,7 @@ compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr, xt_compat_lock(AF_INET); t = xt_find_table_lock(net, AF_INET, get.name); if (!IS_ERR(t)) { - const struct xt_table_info *private = t->private; + const struct xt_table_info *private = xt_table_get_private_protected(t); struct xt_table_info info; ret = compat_table_info(private, &info); if (!ret && get.size == info.size) diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c index cc23f1ce239c28..8cd3224d913e0c 100644 --- a/net/ipv4/netfilter/ipt_rpfilter.c +++ b/net/ipv4/netfilter/ipt_rpfilter.c @@ -76,7 +76,7 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par) flow.daddr = iph->saddr; flow.saddr = rpfilter_get_saddr(iph->daddr); flow.flowi4_mark = info->flags & XT_RPFILTER_VALID_MARK ? skb->mark : 0; - flow.flowi4_tos = RT_TOS(iph->tos); + flow.flowi4_tos = iph->tos & IPTOS_RT_MASK; flow.flowi4_scope = RT_SCOPE_UNIVERSE; flow.flowi4_oif = l3mdev_master_ifindex_rcu(xt_in(par)); diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index 0dc43ad28eb95a..f63f7ada51b367 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -496,7 +496,7 @@ static int nh_check_attr_group(struct net *net, struct nlattr *tb[], for (i = NHA_GROUP_TYPE + 1; i < __NHA_MAX; ++i) { if (!tb[i]) continue; - if (tb[NHA_FDB]) + if (i == NHA_FDB) continue; NL_SET_ERR_MSG(extack, "No other attributes can be set in nexthop groups"); @@ -1277,8 +1277,10 @@ static struct nexthop *nexthop_create_group(struct net *net, return nh; out_no_nh: - for (; i >= 0; --i) + for (i--; i >= 0; --i) { + list_del(&nhg->nh_entries[i].nh_list); nexthop_put(nhg->nh_entries[i].nh); + } kfree(nhg->spare); kfree(nhg); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index b2bc3d7fe9e806..41d03683b13d6b 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2685,6 +2685,7 @@ int tcp_disconnect(struct sock *sk, int flags) icsk->icsk_backoff = 0; icsk->icsk_probes_out = 0; + icsk->icsk_probes_tstamp = 0; icsk->icsk_rto = TCP_TIMEOUT_INIT; icsk->icsk_rto_min = TCP_RTO_MIN; icsk->icsk_delack_max = TCP_DELACK_MAX; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index ef4bdb038a4bbb..fac5c1469ceee2 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2845,7 +2845,8 @@ static void tcp_identify_packet_loss(struct sock *sk, int *ack_flag) } else if (tcp_is_rack(sk)) { u32 prior_retrans = tp->retrans_out; - tcp_rack_mark_lost(sk); + if (tcp_rack_mark_lost(sk)) + *ack_flag &= ~FLAG_SET_XMIT_TIMER; if (prior_retrans > tp->retrans_out) *ack_flag |= FLAG_LOST_RETRANS; } @@ -3370,6 +3371,7 @@ static void tcp_ack_probe(struct sock *sk) return; if (!after(TCP_SKB_CB(head)->end_seq, tcp_wnd_end(tp))) { icsk->icsk_backoff = 0; + icsk->icsk_probes_tstamp = 0; inet_csk_clear_xmit_timer(sk, ICSK_TIME_PROBE0); /* Socket must be waked up by subsequent tcp_data_snd_check(). * This function is not for random using! @@ -3377,8 +3379,8 @@ static void tcp_ack_probe(struct sock *sk) } else { unsigned long when = tcp_probe0_when(sk, TCP_RTO_MAX); - tcp_reset_xmit_timer(sk, ICSK_TIME_PROBE0, - when, TCP_RTO_MAX); + when = tcp_clamp_probe0_to_user_timeout(sk, when); + tcp_reset_xmit_timer(sk, ICSK_TIME_PROBE0, when, TCP_RTO_MAX); } } @@ -3801,9 +3803,6 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) if (tp->tlp_high_seq) tcp_process_tlp_ack(sk, ack, flag); - /* If needed, reset TLP/RTO timer; RACK may later override this. */ - if (flag & FLAG_SET_XMIT_TIMER) - tcp_set_xmit_timer(sk); if (tcp_ack_is_dubious(sk, flag)) { if (!(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP))) { @@ -3816,6 +3815,10 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) &rexmit); } + /* If needed, reset TLP/RTO timer when RACK doesn't set. */ + if (flag & FLAG_SET_XMIT_TIMER) + tcp_set_xmit_timer(sk); + if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP)) sk_dst_confirm(sk); @@ -4379,10 +4382,9 @@ static void tcp_rcv_spurious_retrans(struct sock *sk, const struct sk_buff *skb) * The receiver remembers and reflects via DSACKs. Leverage the * DSACK state and change the txhash to re-route speculatively. */ - if (TCP_SKB_CB(skb)->seq == tcp_sk(sk)->duplicate_sack[0].start_seq) { - sk_rethink_txhash(sk); + if (TCP_SKB_CB(skb)->seq == tcp_sk(sk)->duplicate_sack[0].start_seq && + sk_rethink_txhash(sk)) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDUPLICATEDATAREHASH); - } } static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 595dcc3afac5c1..ab8ed0fc476976 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1590,6 +1590,8 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, tcp_move_syn(newtp, req); ireq->ireq_opt = NULL; } else { + newinet->inet_opt = NULL; + if (!req_unhash && found_dup_sk) { /* This code path should only be executed in the * syncookie case only @@ -1597,8 +1599,6 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, bh_unlock_sock(newsk); sock_put(newsk); newsk = NULL; - } else { - newinet->inet_opt = NULL; } } return newsk; @@ -1755,6 +1755,7 @@ int tcp_v4_early_demux(struct sk_buff *skb) bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb) { u32 limit = READ_ONCE(sk->sk_rcvbuf) + READ_ONCE(sk->sk_sndbuf); + u32 tail_gso_size, tail_gso_segs; struct skb_shared_info *shinfo; const struct tcphdr *th; struct tcphdr *thtail; @@ -1762,6 +1763,7 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb) unsigned int hdrlen; bool fragstolen; u32 gso_segs; + u32 gso_size; int delta; /* In case all data was pulled from skb frags (in __pskb_pull_tail()), @@ -1787,13 +1789,6 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb) */ th = (const struct tcphdr *)skb->data; hdrlen = th->doff * 4; - shinfo = skb_shinfo(skb); - - if (!shinfo->gso_size) - shinfo->gso_size = skb->len - hdrlen; - - if (!shinfo->gso_segs) - shinfo->gso_segs = 1; tail = sk->sk_backlog.tail; if (!tail) @@ -1816,6 +1811,15 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb) goto no_coalesce; __skb_pull(skb, hdrlen); + + shinfo = skb_shinfo(skb); + gso_size = shinfo->gso_size ?: skb->len; + gso_segs = shinfo->gso_segs ?: 1; + + shinfo = skb_shinfo(tail); + tail_gso_size = shinfo->gso_size ?: (tail->len - hdrlen); + tail_gso_segs = shinfo->gso_segs ?: 1; + if (skb_try_coalesce(tail, skb, &fragstolen, &delta)) { TCP_SKB_CB(tail)->end_seq = TCP_SKB_CB(skb)->end_seq; @@ -1842,11 +1846,8 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb) } /* Not as strict as GRO. We only need to carry mss max value */ - skb_shinfo(tail)->gso_size = max(shinfo->gso_size, - skb_shinfo(tail)->gso_size); - - gso_segs = skb_shinfo(tail)->gso_segs + shinfo->gso_segs; - skb_shinfo(tail)->gso_segs = min_t(u32, gso_segs, 0xFFFF); + shinfo->gso_size = max(gso_size, tail_gso_size); + shinfo->gso_segs = min_t(u32, gso_segs + tail_gso_segs, 0xFFFF); sk->sk_backlog.len += delta; __NET_INC_STATS(sock_net(sk), diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 99011768c26402..f99494637ff47a 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -4080,6 +4080,7 @@ void tcp_send_probe0(struct sock *sk) /* Cancel probe timer, if it is not required. */ icsk->icsk_probes_out = 0; icsk->icsk_backoff = 0; + icsk->icsk_probes_tstamp = 0; return; } @@ -4094,6 +4095,8 @@ void tcp_send_probe0(struct sock *sk) */ timeout = TCP_RESOURCE_PROBE_INTERVAL; } + + timeout = tcp_clamp_probe0_to_user_timeout(sk, timeout); tcp_reset_xmit_timer(sk, ICSK_TIME_PROBE0, timeout, TCP_RTO_MAX); } diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c index f65a3ddd0d58ac..31fc178f42c026 100644 --- a/net/ipv4/tcp_recovery.c +++ b/net/ipv4/tcp_recovery.c @@ -96,13 +96,13 @@ static void tcp_rack_detect_loss(struct sock *sk, u32 *reo_timeout) } } -void tcp_rack_mark_lost(struct sock *sk) +bool tcp_rack_mark_lost(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); u32 timeout; if (!tp->rack.advanced) - return; + return false; /* Reset the advanced flag to avoid unnecessary queue scanning */ tp->rack.advanced = 0; @@ -112,6 +112,7 @@ void tcp_rack_mark_lost(struct sock *sk) inet_csk_reset_xmit_timer(sk, ICSK_TIME_REO_TIMEOUT, timeout, inet_csk(sk)->icsk_rto); } + return !!timeout; } /* Record the most recently (re)sent time among the (s)acked packets diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 6c62b9ea1320d9..4ef08079ccfa9d 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -40,6 +40,24 @@ static u32 tcp_clamp_rto_to_user_timeout(const struct sock *sk) return min_t(u32, icsk->icsk_rto, msecs_to_jiffies(remaining)); } +u32 tcp_clamp_probe0_to_user_timeout(const struct sock *sk, u32 when) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + u32 remaining; + s32 elapsed; + + if (!icsk->icsk_user_timeout || !icsk->icsk_probes_tstamp) + return when; + + elapsed = tcp_jiffies32 - icsk->icsk_probes_tstamp; + if (unlikely(elapsed < 0)) + elapsed = 0; + remaining = msecs_to_jiffies(icsk->icsk_user_timeout) - elapsed; + remaining = max_t(u32, remaining, TCP_TIMEOUT_MIN); + + return min_t(u32, remaining, when); +} + /** * tcp_write_err() - close socket and save error info * @sk: The socket the error has appeared on. @@ -219,14 +237,8 @@ static int tcp_write_timeout(struct sock *sk) int retry_until; if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { - if (icsk->icsk_retransmits) { - dst_negative_advice(sk); - } else { - sk_rethink_txhash(sk); - tp->timeout_rehash++; - __NET_INC_STATS(sock_net(sk), - LINUX_MIB_TCPTIMEOUTREHASH); - } + if (icsk->icsk_retransmits) + __dst_negative_advice(sk); retry_until = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_syn_retries; expired = icsk->icsk_retransmits >= retry_until; } else { @@ -234,12 +246,7 @@ static int tcp_write_timeout(struct sock *sk) /* Black hole detection */ tcp_mtu_probing(icsk, sk); - dst_negative_advice(sk); - } else { - sk_rethink_txhash(sk); - tp->timeout_rehash++; - __NET_INC_STATS(sock_net(sk), - LINUX_MIB_TCPTIMEOUTREHASH); + __dst_negative_advice(sk); } retry_until = net->ipv4.sysctl_tcp_retries2; @@ -270,6 +277,11 @@ static int tcp_write_timeout(struct sock *sk) return 1; } + if (sk_rethink_txhash(sk)) { + tp->timeout_rehash++; + __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPTIMEOUTREHASH); + } + return 0; } @@ -349,6 +361,7 @@ static void tcp_probe_timer(struct sock *sk) if (tp->packets_out || !skb) { icsk->icsk_probes_out = 0; + icsk->icsk_probes_tstamp = 0; return; } @@ -360,13 +373,12 @@ static void tcp_probe_timer(struct sock *sk) * corresponding system limit. We also implement similar policy when * we use RTO to probe window in tcp_retransmit_timer(). */ - if (icsk->icsk_user_timeout) { - u32 elapsed = tcp_model_timeout(sk, icsk->icsk_probes_out, - tcp_probe0_base(sk)); - - if (elapsed >= icsk->icsk_user_timeout) - goto abort; - } + if (!icsk->icsk_probes_tstamp) + icsk->icsk_probes_tstamp = tcp_jiffies32; + else if (icsk->icsk_user_timeout && + (s32)(tcp_jiffies32 - icsk->icsk_probes_tstamp) >= + msecs_to_jiffies(icsk->icsk_user_timeout)) + goto abort; max_probes = sock_net(sk)->ipv4.sysctl_tcp_retries2; if (sock_flag(sk, SOCK_DEAD)) { diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 9eeebd4a00542c..e37a2fa65c2944 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2553,7 +2553,8 @@ int udp_v4_early_demux(struct sk_buff *skb) */ if (!inet_sk(sk)->inet_daddr && in_dev) return ip_mc_validate_source(skb, iph->daddr, - iph->saddr, iph->tos, + iph->saddr, + iph->tos & IPTOS_RT_MASK, skb->dev, in_dev, &itag); } return 0; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 8b6eb384bac7ca..4c881f5d9080c3 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2466,8 +2466,9 @@ static void addrconf_add_mroute(struct net_device *dev) .fc_ifindex = dev->ifindex, .fc_dst_len = 8, .fc_flags = RTF_UP, - .fc_type = RTN_UNICAST, + .fc_type = RTN_MULTICAST, .fc_nlinfo.nl_net = dev_net(dev), + .fc_protocol = RTPROT_KERNEL, }; ipv6_addr_set(&cfg.fc_dst, htonl(0xFF000000), 0, 0, 0); diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 52c2f063529fbf..2b804fcebcc651 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -478,7 +478,6 @@ static int esp6_output_encap(struct xfrm_state *x, struct sk_buff *skb, int esp6_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *esp) { u8 *tail; - u8 *vaddr; int nfrags; int esph_offset; struct page *page; @@ -519,14 +518,10 @@ int esp6_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info page = pfrag->page; get_page(page); - vaddr = kmap_atomic(page); - - tail = vaddr + pfrag->offset; + tail = page_address(page) + pfrag->offset; esp_output_fill_trailer(tail, esp->tfclen, esp->plen, esp->proto); - kunmap_atomic(vaddr); - nfrags = skb_shinfo(skb)->nr_frags; __skb_fill_page_desc(skb, nfrags, page, pfrag->offset, diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 605cdd38a919a7..f43e2755572511 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1025,6 +1025,8 @@ static void fib6_purge_rt(struct fib6_info *rt, struct fib6_node *fn, { struct fib6_table *table = rt->fib6_table; + /* Flush all cached dst in exception table */ + rt6_flush_exceptions(rt); fib6_drop_pcpu_from(rt, table); if (rt->nh && !list_empty(&rt->nh_list)) @@ -1927,9 +1929,6 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn, net->ipv6.rt6_stats->fib_rt_entries--; net->ipv6.rt6_stats->fib_discarded_routes++; - /* Flush all cached dst in exception table */ - rt6_flush_exceptions(rt); - /* Reset round-robin state, if necessary */ if (rcu_access_pointer(fn->rr_ptr) == rt) fn->rr_ptr = NULL; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 749ad72386b232..077d43af8226bd 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -125,8 +125,43 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff * return -EINVAL; } +static int +ip6_finish_output_gso_slowpath_drop(struct net *net, struct sock *sk, + struct sk_buff *skb, unsigned int mtu) +{ + struct sk_buff *segs, *nskb; + netdev_features_t features; + int ret = 0; + + /* Please see corresponding comment in ip_finish_output_gso + * describing the cases where GSO segment length exceeds the + * egress MTU. + */ + features = netif_skb_features(skb); + segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK); + if (IS_ERR_OR_NULL(segs)) { + kfree_skb(skb); + return -ENOMEM; + } + + consume_skb(skb); + + skb_list_walk_safe(segs, segs, nskb) { + int err; + + skb_mark_not_on_list(segs); + err = ip6_fragment(net, sk, segs, ip6_finish_output2); + if (err && ret == 0) + ret = err; + } + + return ret; +} + static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb) { + unsigned int mtu; + #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM) /* Policy lookup after SNAT yielded a new policy */ if (skb_dst(skb)->xfrm) { @@ -135,7 +170,11 @@ static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff } #endif - if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) || + mtu = ip6_skb_dst_mtu(skb); + if (skb_is_gso(skb) && !skb_gso_validate_network_len(skb, mtu)) + return ip6_finish_output_gso_slowpath_drop(net, sk, skb, mtu); + + if ((skb->len > mtu && !skb_is_gso(skb)) || dst_allfrag(skb_dst(skb)) || (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size)) return ip6_fragment(net, sk, skb, ip6_finish_output2); diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index c4f532f4d31187..0d453fa9e327bd 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1598,7 +1598,7 @@ compat_get_entries(struct net *net, struct compat_ip6t_get_entries __user *uptr, xt_compat_lock(AF_INET6); t = xt_find_table_lock(net, AF_INET6, get.name); if (!IS_ERR(t)) { - const struct xt_table_info *private = t->private; + const struct xt_table_info *private = xt_table_get_private_protected(t); struct xt_table_info info; ret = compat_table_info(private, &info); if (!ret && get.size == info.size) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 5e7983cb61546f..ff048cb8d80741 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1645,8 +1645,11 @@ static int ipip6_newlink(struct net *src_net, struct net_device *dev, } #ifdef CONFIG_IPV6_SIT_6RD - if (ipip6_netlink_6rd_parms(data, &ip6rd)) + if (ipip6_netlink_6rd_parms(data, &ip6rd)) { err = ipip6_tunnel_update_6rd(nt, &ip6rd); + if (err < 0) + unregister_netdevice_queue(dev, NULL); + } #endif return err; diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index 90470392fdaa77..de5cd3818690c5 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -120,18 +120,17 @@ static ssize_t aqm_write(struct file *file, { struct ieee80211_local *local = file->private_data; char buf[100]; - size_t len; - if (count > sizeof(buf)) + if (count >= sizeof(buf)) return -EINVAL; if (copy_from_user(buf, user_buf, count)) return -EFAULT; - buf[sizeof(buf) - 1] = '\0'; - len = strlen(buf); - if (len > 0 && buf[len-1] == '\n') - buf[len-1] = 0; + if (count && buf[count - 1] == '\n') + buf[count - 1] = '\0'; + else + buf[count] = '\0'; if (sscanf(buf, "fq_limit %u", &local->fq.limit) == 1) return count; @@ -177,18 +176,17 @@ static ssize_t airtime_flags_write(struct file *file, { struct ieee80211_local *local = file->private_data; char buf[16]; - size_t len; - if (count > sizeof(buf)) + if (count >= sizeof(buf)) return -EINVAL; if (copy_from_user(buf, user_buf, count)) return -EFAULT; - buf[sizeof(buf) - 1] = 0; - len = strlen(buf); - if (len > 0 && buf[len - 1] == '\n') - buf[len - 1] = 0; + if (count && buf[count - 1] == '\n') + buf[count - 1] = '\0'; + else + buf[count] = '\0'; if (kstrtou16(buf, 0, &local->airtime_flags)) return -EINVAL; @@ -237,20 +235,19 @@ static ssize_t aql_txq_limit_write(struct file *file, { struct ieee80211_local *local = file->private_data; char buf[100]; - size_t len; u32 ac, q_limit_low, q_limit_high, q_limit_low_old, q_limit_high_old; struct sta_info *sta; - if (count > sizeof(buf)) + if (count >= sizeof(buf)) return -EINVAL; if (copy_from_user(buf, user_buf, count)) return -EFAULT; - buf[sizeof(buf) - 1] = 0; - len = strlen(buf); - if (len > 0 && buf[len - 1] == '\n') - buf[len - 1] = 0; + if (count && buf[count - 1] == '\n') + buf[count - 1] = '\0'; + else + buf[count] = '\0'; if (sscanf(buf, "%u %u %u", &ac, &q_limit_low, &q_limit_high) != 3) return -EINVAL; @@ -306,18 +303,17 @@ static ssize_t force_tx_status_write(struct file *file, { struct ieee80211_local *local = file->private_data; char buf[3]; - size_t len; - if (count > sizeof(buf)) + if (count >= sizeof(buf)) return -EINVAL; if (copy_from_user(buf, user_buf, count)) return -EFAULT; - buf[sizeof(buf) - 1] = '\0'; - len = strlen(buf); - if (len > 0 && buf[len - 1] == '\n') - buf[len - 1] = 0; + if (count && buf[count - 1] == '\n') + buf[count - 1] = '\0'; + else + buf[count] = '\0'; if (buf[0] == '0' && buf[1] == '\0') local->force_tx_status = 0; diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 2a21226fb518a1..d6913784be2bde 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1082,6 +1082,7 @@ enum queue_stop_reason { IEEE80211_QUEUE_STOP_REASON_FLUSH, IEEE80211_QUEUE_STOP_REASON_TDLS_TEARDOWN, IEEE80211_QUEUE_STOP_REASON_RESERVE_TID, + IEEE80211_QUEUE_STOP_REASON_IFTYPE_CHANGE, IEEE80211_QUEUE_STOP_REASONS, }; diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 44154cc596cd40..f3c3557a9e4c4b 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1654,6 +1654,10 @@ static int ieee80211_runtime_change_iftype(struct ieee80211_sub_if_data *sdata, if (ret) return ret; + ieee80211_stop_vif_queues(local, sdata, + IEEE80211_QUEUE_STOP_REASON_IFTYPE_CHANGE); + synchronize_net(); + ieee80211_do_stop(sdata, false); ieee80211_teardown_sdata(sdata); @@ -1676,6 +1680,8 @@ static int ieee80211_runtime_change_iftype(struct ieee80211_sub_if_data *sdata, err = ieee80211_do_open(&sdata->wdev, false); WARN(err, "type change: do_open returned %d", err); + ieee80211_wake_vif_queues(local, sdata, + IEEE80211_QUEUE_STOP_REASON_IFTYPE_CHANGE); return ret; } diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 1e2e5a406d5875..98517423b0b76b 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1758,7 +1758,7 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) } else if (rx->sdata->vif.type == NL80211_IFTYPE_OCB) { sta->rx_stats.last_rx = jiffies; } else if (!ieee80211_is_s1g_beacon(hdr->frame_control) && - is_multicast_ether_addr(hdr->addr1)) { + !is_multicast_ether_addr(hdr->addr1)) { /* * Mesh beacons will update last_rx when if they are found to * match the current local configuration when processed. @@ -4191,6 +4191,8 @@ void ieee80211_check_fast_rx(struct sta_info *sta) rcu_read_lock(); key = rcu_dereference(sta->ptk[sta->ptk_idx]); + if (!key) + key = rcu_dereference(sdata->default_unicast_key); if (key) { switch (key->conf.cipher) { case WLAN_CIPHER_SUITE_TKIP: diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 56a4d0d20a267a..88868bf3005137 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -662,7 +662,7 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx) if (!skip_hw && tx->key && tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) info->control.hw_key = &tx->key->conf; - } else if (!ieee80211_is_mgmt(hdr->frame_control) && tx->sta && + } else if (ieee80211_is_data_present(hdr->frame_control) && tx->sta && test_sta_flag(tx->sta, WLAN_STA_USES_ENCRYPTION)) { return TX_DROP; } @@ -3836,7 +3836,7 @@ void __ieee80211_schedule_txq(struct ieee80211_hw *hw, * get immediately moved to the back of the list on the next * call to ieee80211_next_txq(). */ - if (txqi->txq.sta && + if (txqi->txq.sta && local->airtime_flags && wiphy_ext_feature_isset(local->hw.wiphy, NL80211_EXT_FEATURE_AIRTIME_FAIRNESS)) list_add(&txqi->schedule_order, @@ -4278,7 +4278,6 @@ netdev_tx_t ieee80211_subif_start_xmit_8023(struct sk_buff *skb, struct ethhdr *ehdr = (struct ethhdr *)skb->data; struct ieee80211_key *key; struct sta_info *sta; - bool offload = true; if (unlikely(skb->len < ETH_HLEN)) { kfree_skb(skb); @@ -4294,18 +4293,22 @@ netdev_tx_t ieee80211_subif_start_xmit_8023(struct sk_buff *skb, if (unlikely(IS_ERR_OR_NULL(sta) || !sta->uploaded || !test_sta_flag(sta, WLAN_STA_AUTHORIZED) || - sdata->control_port_protocol == ehdr->h_proto)) - offload = false; - else if ((key = rcu_dereference(sta->ptk[sta->ptk_idx])) && - (!(key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) || - key->conf.cipher == WLAN_CIPHER_SUITE_TKIP)) - offload = false; - - if (offload) - ieee80211_8023_xmit(sdata, dev, sta, key, skb); - else - ieee80211_subif_start_xmit(skb, dev); + sdata->control_port_protocol == ehdr->h_proto)) + goto skip_offload; + + key = rcu_dereference(sta->ptk[sta->ptk_idx]); + if (!key) + key = rcu_dereference(sdata->default_unicast_key); + + if (key && (!(key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) || + key->conf.cipher == WLAN_CIPHER_SUITE_TKIP)) + goto skip_offload; + + ieee80211_8023_xmit(sdata, dev, sta, key, skb); + goto out; +skip_offload: + ieee80211_subif_start_xmit(skb, dev); out: rcu_read_unlock(); diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c index fb0e3a657d2d33..c3ca9737377428 100644 --- a/net/mac80211/vht.c +++ b/net/mac80211/vht.c @@ -465,12 +465,18 @@ enum ieee80211_sta_rx_bandwidth ieee80211_sta_cur_vht_bw(struct sta_info *sta) * IEEE80211-2016 specification makes higher bandwidth operation * possible on the TDLS link if the peers have wider bandwidth * capability. + * + * However, in this case, and only if the TDLS peer is authorized, + * limit to the tdls_chandef so that the configuration here isn't + * wider than what's actually requested on the channel context. */ if (test_sta_flag(sta, WLAN_STA_TDLS_PEER) && - test_sta_flag(sta, WLAN_STA_TDLS_WIDER_BW)) - return bw; - - bw = min(bw, ieee80211_chan_width_to_rx_bw(bss_width)); + test_sta_flag(sta, WLAN_STA_TDLS_WIDER_BW) && + test_sta_flag(sta, WLAN_STA_AUTHORIZED) && + sta->tdls_chandef.chan) + bw = min(bw, ieee80211_chan_width_to_rx_bw(sta->tdls_chandef.width)); + else + bw = min(bw, ieee80211_chan_width_to_rx_bw(bss_width)); return bw; } diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 88f2a7a0ccb86b..967ce9ccfc0da3 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2081,6 +2081,8 @@ struct sock *mptcp_sk_clone(const struct sock *sk, sock_reset_flag(nsk, SOCK_RCU_FREE); /* will be fully established after successful MPC subflow creation */ inet_sk_state_store(nsk, TCP_SYN_RECV); + + security_inet_csk_clone(nsk, req); bh_unlock_sock(nsk); /* keep a single reference */ diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c index 5b1f4ec66dd981..888ccc2d4e34b1 100644 --- a/net/ncsi/ncsi-rsp.c +++ b/net/ncsi/ncsi-rsp.c @@ -1120,7 +1120,7 @@ int ncsi_rcv_rsp(struct sk_buff *skb, struct net_device *dev, int payload, i, ret; /* Find the NCSI device */ - nd = ncsi_find_dev(dev); + nd = ncsi_find_dev(orig_dev); ndp = nd ? TO_NCSI_DEV_PRIV(nd) : NULL; if (!ndp) return -ENODEV; diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index 521e970be4028d..7cd1d31fb2b884 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -143,20 +143,6 @@ htable_size(u8 hbits) return hsize * sizeof(struct hbucket *) + sizeof(struct htable); } -/* Compute htable_bits from the user input parameter hashsize */ -static u8 -htable_bits(u32 hashsize) -{ - /* Assume that hashsize == 2^htable_bits */ - u8 bits = fls(hashsize - 1); - - if (jhash_size(bits) != hashsize) - /* Round up to the first 2^n value */ - bits = fls(hashsize); - - return bits; -} - #ifdef IP_SET_HASH_WITH_NETS #if IPSET_NET_COUNT > 1 #define __CIDR(cidr, i) (cidr[i]) @@ -644,7 +630,7 @@ mtype_resize(struct ip_set *set, bool retried) struct htype *h = set->data; struct htable *t, *orig; u8 htable_bits; - size_t dsize = set->dsize; + size_t hsize, dsize = set->dsize; #ifdef IP_SET_HASH_WITH_NETS u8 flags; struct mtype_elem *tmp; @@ -668,14 +654,12 @@ mtype_resize(struct ip_set *set, bool retried) retry: ret = 0; htable_bits++; - if (!htable_bits) { - /* In case we have plenty of memory :-) */ - pr_warn("Cannot increase the hashsize of set %s further\n", - set->name); - ret = -IPSET_ERR_HASH_FULL; - goto out; - } - t = ip_set_alloc(htable_size(htable_bits)); + if (!htable_bits) + goto hbwarn; + hsize = htable_size(htable_bits); + if (!hsize) + goto hbwarn; + t = ip_set_alloc(hsize); if (!t) { ret = -ENOMEM; goto out; @@ -817,6 +801,12 @@ mtype_resize(struct ip_set *set, bool retried) if (ret == -EAGAIN) goto retry; goto out; + +hbwarn: + /* In case we have plenty of memory :-) */ + pr_warn("Cannot increase the hashsize of set %s further\n", set->name); + ret = -IPSET_ERR_HASH_FULL; + goto out; } /* Get the current number of elements and ext_size in the set */ @@ -1520,7 +1510,11 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set, if (!h) return -ENOMEM; - hbits = htable_bits(hashsize); + /* Compute htable_bits from the user input parameter hashsize. + * Assume that hashsize == 2^htable_bits, + * otherwise round up to the first 2^n value. + */ + hbits = fls(hashsize - 1); hsize = htable_size(hbits); if (hsize == 0) { kfree(h); diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 46c5557c1fecfd..0ee702d374b028 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -523,6 +523,9 @@ nf_conntrack_hash_sysctl(struct ctl_table *table, int write, { int ret; + /* module_param hashsize could have changed value */ + nf_conntrack_htable_size_user = nf_conntrack_htable_size; + ret = proc_dointvec(table, write, buffer, lenp, ppos); if (ret < 0 || !write) return ret; diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index ea923f8cf9c425..b7c3c902290f14 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -1174,6 +1174,7 @@ static int __init nf_nat_init(void) ret = register_pernet_subsys(&nat_net_ops); if (ret < 0) { nf_ct_extend_unregister(&nat_extend); + kvfree(nf_nat_bysource); return ret; } diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index 9af4f93c7f0e18..5c84a968dae292 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -123,7 +123,7 @@ static int nft_dynset_init(const struct nft_ctx *ctx, u32 flags = ntohl(nla_get_be32(tb[NFTA_DYNSET_FLAGS])); if (flags & ~NFT_DYNSET_F_INV) - return -EINVAL; + return -EOPNOTSUPP; if (flags & NFT_DYNSET_F_INV) priv->invert = true; } @@ -156,7 +156,7 @@ static int nft_dynset_init(const struct nft_ctx *ctx, timeout = 0; if (tb[NFTA_DYNSET_TIMEOUT] != NULL) { if (!(set->flags & NFT_SET_TIMEOUT)) - return -EINVAL; + return -EOPNOTSUPP; err = nf_msecs_to_jiffies64(tb[NFTA_DYNSET_TIMEOUT], &timeout); if (err) @@ -170,7 +170,7 @@ static int nft_dynset_init(const struct nft_ctx *ctx, if (tb[NFTA_DYNSET_SREG_DATA] != NULL) { if (!(set->flags & NFT_SET_MAP)) - return -EINVAL; + return -EOPNOTSUPP; if (set->dtype == NFT_DATA_VERDICT) return -EOPNOTSUPP; @@ -204,8 +204,10 @@ static int nft_dynset_init(const struct nft_ctx *ctx, nft_set_ext_add_length(&priv->tmpl, NFT_SET_EXT_EXPR, priv->expr->ops->size); if (set->flags & NFT_SET_TIMEOUT) { - if (timeout || set->timeout) + if (timeout || set->timeout) { + nft_set_ext_add(&priv->tmpl, NFT_SET_EXT_TIMEOUT); nft_set_ext_add(&priv->tmpl, NFT_SET_EXT_EXPIRATION); + } } priv->timeout = timeout; diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c index 37253d399c6b8d..0d5c422f87452f 100644 --- a/net/netfilter/xt_RATEEST.c +++ b/net/netfilter/xt_RATEEST.c @@ -115,6 +115,9 @@ static int xt_rateest_tg_checkentry(const struct xt_tgchk_param *par) } cfg; int ret; + if (strnlen(info->name, sizeof(est->name)) >= sizeof(est->name)) + return -ENAMETOOLONG; + net_get_random_once(&jhash_rnd, sizeof(jhash_rnd)); mutex_lock(&xn->hash_lock); diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index 8709f3d4e7c4b5..bec7847f8eaac6 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -852,6 +852,7 @@ static int nfc_genl_stop_poll(struct sk_buff *skb, struct genl_info *info) if (!dev->polling) { device_unlock(&dev->dev); + nfc_put_device(dev); return -EINVAL; } diff --git a/net/nfc/rawsock.c b/net/nfc/rawsock.c index 955c195ae14bcf..9c7eb8455ba8eb 100644 --- a/net/nfc/rawsock.c +++ b/net/nfc/rawsock.c @@ -105,7 +105,7 @@ static int rawsock_connect(struct socket *sock, struct sockaddr *_addr, if (addr->target_idx > dev->target_next_idx - 1 || addr->target_idx < dev->target_next_idx - dev->n_targets) { rc = -EINVAL; - goto error; + goto put_dev; } rc = nfc_activate_target(dev, addr->target_idx, addr->nfc_protocol); diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index 8df1964db33328..a0b033954ceacc 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -197,6 +197,7 @@ void rxrpc_discard_prealloc(struct rxrpc_sock *rx) tail = b->peer_backlog_tail; while (CIRC_CNT(head, tail, size) > 0) { struct rxrpc_peer *peer = b->peer_backlog[tail]; + rxrpc_put_local(peer->local); kfree(peer); tail = (tail + 1) & (size - 1); } diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 667c44aa5a63cb..dc201363f2c485 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -430,7 +430,7 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb) return; } - if (call->state == RXRPC_CALL_SERVER_RECV_REQUEST) { + if (state == RXRPC_CALL_SERVER_RECV_REQUEST) { unsigned long timo = READ_ONCE(call->next_req_timo); unsigned long now, expect_req_by; diff --git a/net/rxrpc/key.c b/net/rxrpc/key.c index 2e8bd3b97301e6..979338a64c0ca5 100644 --- a/net/rxrpc/key.c +++ b/net/rxrpc/key.c @@ -1109,7 +1109,7 @@ static long rxrpc_read(const struct key *key, default: /* we have a ticket we can't encode */ pr_err("Unsupported key token type (%u)\n", token->security_index); - continue; + return -ENOPKG; } _debug("token[%u]: toksize=%u", ntoks, toksize); @@ -1224,7 +1224,9 @@ static long rxrpc_read(const struct key *key, break; default: - break; + pr_err("Unsupported key token type (%u)\n", + token->security_index); + return -ENOPKG; } ASSERTCMP((unsigned long)xdr - (unsigned long)oldxdr, ==, diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 1319986693fc88..84f932532db7dc 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -1272,6 +1272,10 @@ static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key, nla_opt_msk = nla_data(tb[TCA_FLOWER_KEY_ENC_OPTS_MASK]); msk_depth = nla_len(tb[TCA_FLOWER_KEY_ENC_OPTS_MASK]); + if (!nla_ok(nla_opt_msk, msk_depth)) { + NL_SET_ERR_MSG(extack, "Invalid nested attribute for masks"); + return -EINVAL; + } } nla_for_each_attr(nla_opt_key, nla_enc_key, @@ -1307,9 +1311,6 @@ static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key, NL_SET_ERR_MSG(extack, "Key and mask miss aligned"); return -EINVAL; } - - if (msk_depth) - nla_opt_msk = nla_next(nla_opt_msk, &msk_depth); break; case TCA_FLOWER_KEY_ENC_OPTS_VXLAN: if (key->enc_opts.dst_opt_type) { @@ -1340,9 +1341,6 @@ static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key, NL_SET_ERR_MSG(extack, "Key and mask miss aligned"); return -EINVAL; } - - if (msk_depth) - nla_opt_msk = nla_next(nla_opt_msk, &msk_depth); break; case TCA_FLOWER_KEY_ENC_OPTS_ERSPAN: if (key->enc_opts.dst_opt_type) { @@ -1373,14 +1371,20 @@ static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key, NL_SET_ERR_MSG(extack, "Key and mask miss aligned"); return -EINVAL; } - - if (msk_depth) - nla_opt_msk = nla_next(nla_opt_msk, &msk_depth); break; default: NL_SET_ERR_MSG(extack, "Unknown tunnel option type"); return -EINVAL; } + + if (!msk_depth) + continue; + + if (!nla_ok(nla_opt_msk, msk_depth)) { + NL_SET_ERR_MSG(extack, "A mask attribute is invalid"); + return -EINVAL; + } + nla_opt_msk = nla_next(nla_opt_msk, &msk_depth); } return 0; diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index 78bec347b8b66f..c4007b9cd16d6a 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -366,9 +366,13 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, if (tb[TCA_TCINDEX_MASK]) cp->mask = nla_get_u16(tb[TCA_TCINDEX_MASK]); - if (tb[TCA_TCINDEX_SHIFT]) + if (tb[TCA_TCINDEX_SHIFT]) { cp->shift = nla_get_u32(tb[TCA_TCINDEX_SHIFT]); - + if (cp->shift > 16) { + err = -EINVAL; + goto errout; + } + } if (!cp->hash) { /* Hash not specified, use perfect hash if the upper limit * of the hashing index is below the threshold. diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 2a76a2f5ed88cd..5e8e49c4ab5caf 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -412,7 +412,8 @@ struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, { struct qdisc_rate_table *rtab; - if (tab == NULL || r->rate == 0 || r->cell_log == 0 || + if (tab == NULL || r->rate == 0 || + r->cell_log == 0 || r->cell_log >= 32 || nla_len(tab) != TC_RTAB_SIZE) { NL_SET_ERR_MSG(extack, "Invalid rate table parameters for searching"); return NULL; diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c index bd618b00d31932..50f680f03a547e 100644 --- a/net/sched/sch_choke.c +++ b/net/sched/sch_choke.c @@ -362,7 +362,7 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt, ctl = nla_data(tb[TCA_CHOKE_PARMS]); - if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog)) + if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog, ctl->Scell_log)) return -EINVAL; if (ctl->limit > CHOKE_MAX_QUEUE) diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index 8599c6f31b057f..e0bc77533acc39 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c @@ -480,7 +480,7 @@ static inline int gred_change_vq(struct Qdisc *sch, int dp, struct gred_sched *table = qdisc_priv(sch); struct gred_sched_data *q = table->tab[dp]; - if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog)) { + if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog, ctl->Scell_log)) { NL_SET_ERR_MSG_MOD(extack, "invalid RED parameters"); return -EINVAL; } diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index e89fab6ccb34f7..b4ae34d7aa965d 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -250,7 +250,7 @@ static int __red_change(struct Qdisc *sch, struct nlattr **tb, max_P = tb[TCA_RED_MAX_P] ? nla_get_u32(tb[TCA_RED_MAX_P]) : 0; ctl = nla_data(tb[TCA_RED_PARMS]); - if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog)) + if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog, ctl->Scell_log)) return -EINVAL; err = red_get_flags(ctl->flags, TC_RED_HISTORIC_FLAGS, diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index bca2be57d9fc17..b25e51440623bc 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -647,7 +647,7 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt) } if (ctl_v1 && !red_check_params(ctl_v1->qth_min, ctl_v1->qth_max, - ctl_v1->Wlog)) + ctl_v1->Wlog, ctl_v1->Scell_log)) return -EINVAL; if (ctl_v1 && ctl_v1->qth_min) { p = kmalloc(sizeof(*p), GFP_KERNEL); diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index b0ad7687ee2c85..c966c05a0be923 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -1596,6 +1596,22 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, return err; } +static void taprio_reset(struct Qdisc *sch) +{ + struct taprio_sched *q = qdisc_priv(sch); + struct net_device *dev = qdisc_dev(sch); + int i; + + hrtimer_cancel(&q->advance_timer); + if (q->qdiscs) { + for (i = 0; i < dev->num_tx_queues; i++) + if (q->qdiscs[i]) + qdisc_reset(q->qdiscs[i]); + } + sch->qstats.backlog = 0; + sch->q.qlen = 0; +} + static void taprio_destroy(struct Qdisc *sch) { struct taprio_sched *q = qdisc_priv(sch); @@ -1606,12 +1622,11 @@ static void taprio_destroy(struct Qdisc *sch) list_del(&q->taprio_list); spin_unlock(&taprio_list_lock); - hrtimer_cancel(&q->advance_timer); taprio_disable_offload(dev, q, NULL); if (q->qdiscs) { - for (i = 0; i < dev->num_tx_queues && q->qdiscs[i]; i++) + for (i = 0; i < dev->num_tx_queues; i++) qdisc_put(q->qdiscs[i]); kfree(q->qdiscs); @@ -1953,6 +1968,7 @@ static struct Qdisc_ops taprio_qdisc_ops __read_mostly = { .init = taprio_init, .change = taprio_change, .destroy = taprio_destroy, + .reset = taprio_reset, .peek = taprio_peek, .dequeue = taprio_dequeue, .enqueue = taprio_enqueue, diff --git a/net/sunrpc/addr.c b/net/sunrpc/addr.c index 010dcb876f9d72..6e4dbd577a39fa 100644 --- a/net/sunrpc/addr.c +++ b/net/sunrpc/addr.c @@ -185,7 +185,7 @@ static int rpc_parse_scope_id(struct net *net, const char *buf, scope_id = dev->ifindex; dev_put(dev); } else { - if (kstrtou32(p, 10, &scope_id) == 0) { + if (kstrtou32(p, 10, &scope_id) != 0) { kfree(p); return 0; } diff --git a/net/sunrpc/debugfs.c b/net/sunrpc/debugfs.c index fd9bca24272427..56029e3af6ff0b 100644 --- a/net/sunrpc/debugfs.c +++ b/net/sunrpc/debugfs.c @@ -128,13 +128,13 @@ static int do_xprt_debugfs(struct rpc_clnt *clnt, struct rpc_xprt *xprt, void *n return 0; len = snprintf(name, sizeof(name), "../../rpc_xprt/%s", xprt->debugfs->d_name.name); - if (len > sizeof(name)) + if (len >= sizeof(name)) return -1; if (*nump == 0) strcpy(link, "xprt"); else { len = snprintf(link, sizeof(link), "xprt%d", *nump); - if (len > sizeof(link)) + if (len >= sizeof(link)) return -1; } debugfs_create_symlink(link, clnt->cl_debugfs, name); diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index f06d7c315017cd..cf702a5f7fe5de 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -675,6 +675,23 @@ struct rpc_task *rpc_wake_up_next(struct rpc_wait_queue *queue) } EXPORT_SYMBOL_GPL(rpc_wake_up_next); +/** + * rpc_wake_up_locked - wake up all rpc_tasks + * @queue: rpc_wait_queue on which the tasks are sleeping + * + */ +static void rpc_wake_up_locked(struct rpc_wait_queue *queue) +{ + struct rpc_task *task; + + for (;;) { + task = __rpc_find_next_queued(queue); + if (task == NULL) + break; + rpc_wake_up_task_queue_locked(queue, task); + } +} + /** * rpc_wake_up - wake up all rpc_tasks * @queue: rpc_wait_queue on which the tasks are sleeping @@ -683,25 +700,28 @@ EXPORT_SYMBOL_GPL(rpc_wake_up_next); */ void rpc_wake_up(struct rpc_wait_queue *queue) { - struct list_head *head; - spin_lock(&queue->lock); - head = &queue->tasks[queue->maxpriority]; + rpc_wake_up_locked(queue); + spin_unlock(&queue->lock); +} +EXPORT_SYMBOL_GPL(rpc_wake_up); + +/** + * rpc_wake_up_status_locked - wake up all rpc_tasks and set their status value. + * @queue: rpc_wait_queue on which the tasks are sleeping + * @status: status value to set + */ +static void rpc_wake_up_status_locked(struct rpc_wait_queue *queue, int status) +{ + struct rpc_task *task; + for (;;) { - while (!list_empty(head)) { - struct rpc_task *task; - task = list_first_entry(head, - struct rpc_task, - u.tk_wait.list); - rpc_wake_up_task_queue_locked(queue, task); - } - if (head == &queue->tasks[0]) + task = __rpc_find_next_queued(queue); + if (task == NULL) break; - head--; + rpc_wake_up_task_queue_set_status_locked(queue, task, status); } - spin_unlock(&queue->lock); } -EXPORT_SYMBOL_GPL(rpc_wake_up); /** * rpc_wake_up_status - wake up all rpc_tasks and set their status value. @@ -712,23 +732,8 @@ EXPORT_SYMBOL_GPL(rpc_wake_up); */ void rpc_wake_up_status(struct rpc_wait_queue *queue, int status) { - struct list_head *head; - spin_lock(&queue->lock); - head = &queue->tasks[queue->maxpriority]; - for (;;) { - while (!list_empty(head)) { - struct rpc_task *task; - task = list_first_entry(head, - struct rpc_task, - u.tk_wait.list); - task->tk_status = status; - rpc_wake_up_task_queue_locked(queue, task); - } - if (head == &queue->tasks[0]) - break; - head--; - } + rpc_wake_up_status_locked(queue, status); spin_unlock(&queue->lock); } EXPORT_SYMBOL_GPL(rpc_wake_up_status); diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index c2752e2b9ce34e..4404c491eb3882 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1062,6 +1062,90 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) return 0; /* record not complete */ } +static int svc_tcp_send_kvec(struct socket *sock, const struct kvec *vec, + int flags) +{ + return kernel_sendpage(sock, virt_to_page(vec->iov_base), + offset_in_page(vec->iov_base), + vec->iov_len, flags); +} + +/* + * kernel_sendpage() is used exclusively to reduce the number of + * copy operations in this path. Therefore the caller must ensure + * that the pages backing @xdr are unchanging. + * + * In addition, the logic assumes that * .bv_len is never larger + * than PAGE_SIZE. + */ +static int svc_tcp_sendmsg(struct socket *sock, struct msghdr *msg, + struct xdr_buf *xdr, rpc_fraghdr marker, + unsigned int *sentp) +{ + const struct kvec *head = xdr->head; + const struct kvec *tail = xdr->tail; + struct kvec rm = { + .iov_base = &marker, + .iov_len = sizeof(marker), + }; + int flags, ret; + + *sentp = 0; + xdr_alloc_bvec(xdr, GFP_KERNEL); + + msg->msg_flags = MSG_MORE; + ret = kernel_sendmsg(sock, msg, &rm, 1, rm.iov_len); + if (ret < 0) + return ret; + *sentp += ret; + if (ret != rm.iov_len) + return -EAGAIN; + + flags = head->iov_len < xdr->len ? MSG_MORE | MSG_SENDPAGE_NOTLAST : 0; + ret = svc_tcp_send_kvec(sock, head, flags); + if (ret < 0) + return ret; + *sentp += ret; + if (ret != head->iov_len) + goto out; + + if (xdr->page_len) { + unsigned int offset, len, remaining; + struct bio_vec *bvec; + + bvec = xdr->bvec; + offset = xdr->page_base; + remaining = xdr->page_len; + flags = MSG_MORE | MSG_SENDPAGE_NOTLAST; + while (remaining > 0) { + if (remaining <= PAGE_SIZE && tail->iov_len == 0) + flags = 0; + len = min(remaining, bvec->bv_len); + ret = kernel_sendpage(sock, bvec->bv_page, + bvec->bv_offset + offset, + len, flags); + if (ret < 0) + return ret; + *sentp += ret; + if (ret != len) + goto out; + remaining -= len; + offset = 0; + bvec++; + } + } + + if (tail->iov_len) { + ret = svc_tcp_send_kvec(sock, tail, 0); + if (ret < 0) + return ret; + *sentp += ret; + } + +out: + return 0; +} + /** * svc_tcp_sendto - Send out a reply on a TCP socket * @rqstp: completed svc_rqst @@ -1089,7 +1173,7 @@ static int svc_tcp_sendto(struct svc_rqst *rqstp) mutex_lock(&xprt->xpt_mutex); if (svc_xprt_is_dead(xprt)) goto out_notconn; - err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, marker, &sent); + err = svc_tcp_sendmsg(svsk->sk_sock, &msg, xdr, marker, &sent); xdr_free_bvec(xdr); trace_svcsock_tcp_send(xprt, err < 0 ? err : sent); if (err < 0 || sent != (xdr->len + sizeof(marker))) diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index f6c17e75f20ede..57f09ea3ef2af7 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -151,31 +151,64 @@ int xprt_unregister_transport(struct xprt_class *transport) } EXPORT_SYMBOL_GPL(xprt_unregister_transport); +static void +xprt_class_release(const struct xprt_class *t) +{ + module_put(t->owner); +} + +static const struct xprt_class * +xprt_class_find_by_netid_locked(const char *netid) +{ + const struct xprt_class *t; + unsigned int i; + + list_for_each_entry(t, &xprt_list, list) { + for (i = 0; t->netid[i][0] != '\0'; i++) { + if (strcmp(t->netid[i], netid) != 0) + continue; + if (!try_module_get(t->owner)) + continue; + return t; + } + } + return NULL; +} + +static const struct xprt_class * +xprt_class_find_by_netid(const char *netid) +{ + const struct xprt_class *t; + + spin_lock(&xprt_list_lock); + t = xprt_class_find_by_netid_locked(netid); + if (!t) { + spin_unlock(&xprt_list_lock); + request_module("rpc%s", netid); + spin_lock(&xprt_list_lock); + t = xprt_class_find_by_netid_locked(netid); + } + spin_unlock(&xprt_list_lock); + return t; +} + /** * xprt_load_transport - load a transport implementation - * @transport_name: transport to load + * @netid: transport to load * * Returns: * 0: transport successfully loaded * -ENOENT: transport module not available */ -int xprt_load_transport(const char *transport_name) +int xprt_load_transport(const char *netid) { - struct xprt_class *t; - int result; + const struct xprt_class *t; - result = 0; - spin_lock(&xprt_list_lock); - list_for_each_entry(t, &xprt_list, list) { - if (strcmp(t->name, transport_name) == 0) { - spin_unlock(&xprt_list_lock); - goto out; - } - } - spin_unlock(&xprt_list_lock); - result = request_module("xprt%s", transport_name); -out: - return result; + t = xprt_class_find_by_netid(netid); + if (!t) + return -ENOENT; + xprt_class_release(t); + return 0; } EXPORT_SYMBOL_GPL(xprt_load_transport); diff --git a/net/sunrpc/xprtrdma/module.c b/net/sunrpc/xprtrdma/module.c index 620327c01302ce..45c5b41ac8dc90 100644 --- a/net/sunrpc/xprtrdma/module.c +++ b/net/sunrpc/xprtrdma/module.c @@ -24,6 +24,7 @@ MODULE_DESCRIPTION("RPC/RDMA Transport"); MODULE_LICENSE("Dual BSD/GPL"); MODULE_ALIAS("svcrdma"); MODULE_ALIAS("xprtrdma"); +MODULE_ALIAS("rpcrdma6"); static void __exit rpc_rdma_cleanup(void) { diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 0f5120c7668ff9..c48536f2121fb2 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -179,6 +179,31 @@ rpcrdma_nonpayload_inline(const struct rpcrdma_xprt *r_xprt, r_xprt->rx_ep->re_max_inline_recv; } +/* ACL likes to be lazy in allocating pages. For TCP, these + * pages can be allocated during receive processing. Not true + * for RDMA, which must always provision receive buffers + * up front. + */ +static noinline int +rpcrdma_alloc_sparse_pages(struct xdr_buf *buf) +{ + struct page **ppages; + int len; + + len = buf->page_len; + ppages = buf->pages + (buf->page_base >> PAGE_SHIFT); + while (len > 0) { + if (!*ppages) + *ppages = alloc_page(GFP_NOWAIT | __GFP_NOWARN); + if (!*ppages) + return -ENOBUFS; + ppages++; + len -= PAGE_SIZE; + } + + return 0; +} + /* Split @vec on page boundaries into SGEs. FMR registers pages, not * a byte range. Other modes coalesce these SGEs into a single MR * when they can. @@ -233,15 +258,6 @@ rpcrdma_convert_iovs(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdrbuf, ppages = xdrbuf->pages + (xdrbuf->page_base >> PAGE_SHIFT); page_base = offset_in_page(xdrbuf->page_base); while (len) { - /* ACL likes to be lazy in allocating pages - ACLs - * are small by default but can get huge. - */ - if (unlikely(xdrbuf->flags & XDRBUF_SPARSE_PAGES)) { - if (!*ppages) - *ppages = alloc_page(GFP_NOWAIT | __GFP_NOWARN); - if (!*ppages) - return -ENOBUFS; - } seg->mr_page = *ppages; seg->mr_offset = (char *)page_base; seg->mr_len = min_t(u32, PAGE_SIZE - page_base, len); @@ -867,6 +883,12 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst) __be32 *p; int ret; + if (unlikely(rqst->rq_rcv_buf.flags & XDRBUF_SPARSE_PAGES)) { + ret = rpcrdma_alloc_sparse_pages(&rqst->rq_rcv_buf); + if (ret) + return ret; + } + rpcrdma_set_xdrlen(&req->rl_hdrbuf, 0); xdr_init_encode(xdr, &req->rl_hdrbuf, rdmab_data(req->rl_rdmabuf), rqst); diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 8915e42240d38b..035060c05fd5aa 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -768,6 +768,7 @@ static struct xprt_class xprt_rdma = { .owner = THIS_MODULE, .ident = XPRT_TRANSPORT_RDMA, .setup = xprt_setup_rdma, + .netid = { "rdma", "rdma6", "" }, }; void xprt_rdma_cleanup(void) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 7090bbee0ec59d..c56a66cdf4ac80 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -433,7 +433,8 @@ xs_read_xdr_buf(struct socket *sock, struct msghdr *msg, int flags, if (ret <= 0) goto sock_err; xs_flush_bvec(buf->bvec, ret, seek + buf->page_base); - offset += ret - buf->page_base; + ret -= buf->page_base; + offset += ret; if (offset == count || msg->msg_flags & (MSG_EOR|MSG_TRUNC)) goto out; if (ret != want) @@ -3059,6 +3060,7 @@ static struct xprt_class xs_local_transport = { .owner = THIS_MODULE, .ident = XPRT_TRANSPORT_LOCAL, .setup = xs_setup_local, + .netid = { "" }, }; static struct xprt_class xs_udp_transport = { @@ -3067,6 +3069,7 @@ static struct xprt_class xs_udp_transport = { .owner = THIS_MODULE, .ident = XPRT_TRANSPORT_UDP, .setup = xs_setup_udp, + .netid = { "udp", "udp6", "" }, }; static struct xprt_class xs_tcp_transport = { @@ -3075,6 +3078,7 @@ static struct xprt_class xs_tcp_transport = { .owner = THIS_MODULE, .ident = XPRT_TRANSPORT_TCP, .setup = xs_setup_tcp, + .netid = { "tcp", "tcp6", "" }, }; static struct xprt_class xs_bc_tcp_transport = { @@ -3083,6 +3087,7 @@ static struct xprt_class xs_bc_tcp_transport = { .owner = THIS_MODULE, .ident = XPRT_TRANSPORT_BC_TCP, .setup = xs_setup_bc_tcp, + .netid = { "" }, }; /** diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 23d8685453627c..2c1ffc9ba2eb25 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -460,10 +460,11 @@ static int __switchdev_handle_port_obj_add(struct net_device *dev, extack = switchdev_notifier_info_to_extack(&port_obj_info->info); if (check_cb(dev)) { - /* This flag is only checked if the return value is success. */ - port_obj_info->handled = true; - return add_cb(dev, port_obj_info->obj, port_obj_info->trans, - extack); + err = add_cb(dev, port_obj_info->obj, port_obj_info->trans, + extack); + if (err != -EOPNOTSUPP) + port_obj_info->handled = true; + return err; } /* Switch ports might be stacked under e.g. a LAG. Ignore the @@ -515,9 +516,10 @@ static int __switchdev_handle_port_obj_del(struct net_device *dev, int err = -EOPNOTSUPP; if (check_cb(dev)) { - /* This flag is only checked if the return value is success. */ - port_obj_info->handled = true; - return del_cb(dev, port_obj_info->obj); + err = del_cb(dev, port_obj_info->obj); + if (err != -EOPNOTSUPP) + port_obj_info->handled = true; + return err; } /* Switch ports might be stacked under e.g. a LAG. Ignore the @@ -568,9 +570,10 @@ static int __switchdev_handle_port_attr_set(struct net_device *dev, int err = -EOPNOTSUPP; if (check_cb(dev)) { - port_attr_info->handled = true; - return set_cb(dev, port_attr_info->attr, - port_attr_info->trans); + err = set_cb(dev, port_attr_info->attr, port_attr_info->trans); + if (err != -EOPNOTSUPP) + port_attr_info->handled = true; + return err; } /* Switch ports might be stacked under e.g. a LAG. Ignore the diff --git a/net/tipc/link.c b/net/tipc/link.c index 06b880da2a8ea2..c92e6984933cb7 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -996,7 +996,6 @@ void tipc_link_reset(struct tipc_link *l) int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list, struct sk_buff_head *xmitq) { - struct tipc_msg *hdr = buf_msg(skb_peek(list)); struct sk_buff_head *backlogq = &l->backlogq; struct sk_buff_head *transmq = &l->transmq; struct sk_buff *skb, *_skb; @@ -1004,13 +1003,18 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list, u16 ack = l->rcv_nxt - 1; u16 seqno = l->snd_nxt; int pkt_cnt = skb_queue_len(list); - int imp = msg_importance(hdr); unsigned int mss = tipc_link_mss(l); unsigned int cwin = l->window; unsigned int mtu = l->mtu; + struct tipc_msg *hdr; bool new_bundle; int rc = 0; + int imp; + + if (pkt_cnt <= 0) + return 0; + hdr = buf_msg(skb_peek(list)); if (unlikely(msg_size(hdr) > mtu)) { pr_warn("Too large msg, purging xmit list %d %d %d %d %d!\n", skb_queue_len(list), msg_user(hdr), @@ -1019,6 +1023,7 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list, return -EMSGSIZE; } + imp = msg_importance(hdr); /* Allow oversubscription of one data msg per source at congestion */ if (unlikely(l->backlog[imp].len >= l->backlog[imp].limit)) { if (imp == TIPC_SYSTEM_IMPORTANCE) { diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index d10916ab452679..f64e681493a598 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -997,9 +997,12 @@ static __poll_t vsock_poll(struct file *file, struct socket *sock, mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND; } else if (sock->type == SOCK_STREAM) { - const struct vsock_transport *transport = vsk->transport; + const struct vsock_transport *transport; + lock_sock(sk); + transport = vsk->transport; + /* Listening sockets that have connections in their accept * queue can be read. */ @@ -1082,10 +1085,11 @@ static int vsock_dgram_sendmsg(struct socket *sock, struct msghdr *msg, err = 0; sk = sock->sk; vsk = vsock_sk(sk); - transport = vsk->transport; lock_sock(sk); + transport = vsk->transport; + err = vsock_auto_bind(vsk); if (err) goto out; @@ -1544,10 +1548,11 @@ static int vsock_stream_setsockopt(struct socket *sock, err = 0; sk = sock->sk; vsk = vsock_sk(sk); - transport = vsk->transport; lock_sock(sk); + transport = vsk->transport; + switch (optname) { case SO_VM_SOCKETS_BUFFER_SIZE: COPY_IN(val); @@ -1680,7 +1685,6 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg, sk = sock->sk; vsk = vsock_sk(sk); - transport = vsk->transport; total_written = 0; err = 0; @@ -1689,6 +1693,8 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg, lock_sock(sk); + transport = vsk->transport; + /* Callers should not provide a destination with stream sockets. */ if (msg->msg_namelen) { err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP; @@ -1823,11 +1829,12 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, sk = sock->sk; vsk = vsock_sk(sk); - transport = vsk->transport; err = 0; lock_sock(sk); + transport = vsk->transport; + if (!transport || sk->sk_state != TCP_ESTABLISHED) { /* Recvmsg is supposed to return 0 if a peer performs an * orderly shutdown. Differentiate between that case and when a diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig index 27026f587fa618..f620acd2a0f5e2 100644 --- a/net/wireless/Kconfig +++ b/net/wireless/Kconfig @@ -21,6 +21,7 @@ config CFG80211 tristate "cfg80211 - wireless configuration API" depends on RFKILL || !RFKILL select FW_LOADER + select CRC32 # may need to update this when certificates are changed and are # using a different algorithm, though right now they shouldn't # (this is here rather than below to allow it to be a module) diff --git a/net/wireless/core.h b/net/wireless/core.h index e3e9686859d459..7df91f94021241 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -433,6 +433,8 @@ void cfg80211_sme_abandon_assoc(struct wireless_dev *wdev); /* internal helpers */ bool cfg80211_supported_cipher_suite(struct wiphy *wiphy, u32 cipher); +bool cfg80211_valid_key_idx(struct cfg80211_registered_device *rdev, + int key_idx, bool pairwise); int cfg80211_validate_key_settings(struct cfg80211_registered_device *rdev, struct key_params *params, int key_idx, bool pairwise, const u8 *mac_addr); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index f67ddf2cebcbe7..535e34a84d651c 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -4260,9 +4260,6 @@ static int nl80211_del_key(struct sk_buff *skb, struct genl_info *info) if (err) return err; - if (key.idx < 0) - return -EINVAL; - if (info->attrs[NL80211_ATTR_MAC]) mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]); @@ -4278,6 +4275,10 @@ static int nl80211_del_key(struct sk_buff *skb, struct genl_info *info) key.type != NL80211_KEYTYPE_GROUP) return -EINVAL; + if (!cfg80211_valid_key_idx(rdev, key.idx, + key.type == NL80211_KEYTYPE_PAIRWISE)) + return -EINVAL; + if (!rdev->ops->del_key) return -EOPNOTSUPP; diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 8d0e49c46db37c..3409f37d838b30 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -694,7 +694,7 @@ static void cfg80211_scan_req_add_chan(struct cfg80211_scan_request *request, static bool cfg80211_find_ssid_match(struct cfg80211_colocated_ap *ap, struct cfg80211_scan_request *request) { - u8 i; + int i; u32 s_ssid; for (i = 0; i < request->n_ssids; i++) { diff --git a/net/wireless/util.c b/net/wireless/util.c index f01746894a4e92..e4247c35435668 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -272,18 +272,53 @@ bool cfg80211_supported_cipher_suite(struct wiphy *wiphy, u32 cipher) return false; } -int cfg80211_validate_key_settings(struct cfg80211_registered_device *rdev, - struct key_params *params, int key_idx, - bool pairwise, const u8 *mac_addr) +static bool +cfg80211_igtk_cipher_supported(struct cfg80211_registered_device *rdev) { - int max_key_idx = 5; + struct wiphy *wiphy = &rdev->wiphy; + int i; + + for (i = 0; i < wiphy->n_cipher_suites; i++) { + switch (wiphy->cipher_suites[i]) { + case WLAN_CIPHER_SUITE_AES_CMAC: + case WLAN_CIPHER_SUITE_BIP_CMAC_256: + case WLAN_CIPHER_SUITE_BIP_GMAC_128: + case WLAN_CIPHER_SUITE_BIP_GMAC_256: + return true; + } + } + + return false; +} - if (wiphy_ext_feature_isset(&rdev->wiphy, - NL80211_EXT_FEATURE_BEACON_PROTECTION) || - wiphy_ext_feature_isset(&rdev->wiphy, - NL80211_EXT_FEATURE_BEACON_PROTECTION_CLIENT)) +bool cfg80211_valid_key_idx(struct cfg80211_registered_device *rdev, + int key_idx, bool pairwise) +{ + int max_key_idx; + + if (pairwise) + max_key_idx = 3; + else if (wiphy_ext_feature_isset(&rdev->wiphy, + NL80211_EXT_FEATURE_BEACON_PROTECTION) || + wiphy_ext_feature_isset(&rdev->wiphy, + NL80211_EXT_FEATURE_BEACON_PROTECTION_CLIENT)) max_key_idx = 7; + else if (cfg80211_igtk_cipher_supported(rdev)) + max_key_idx = 5; + else + max_key_idx = 3; + if (key_idx < 0 || key_idx > max_key_idx) + return false; + + return true; +} + +int cfg80211_validate_key_settings(struct cfg80211_registered_device *rdev, + struct key_params *params, int key_idx, + bool pairwise, const u8 *mac_addr) +{ + if (!cfg80211_valid_key_idx(rdev, key_idx, pairwise)) return -EINVAL; if (!pairwise && mac_addr && !(rdev->wiphy.flags & WIPHY_FLAG_IBSS_RSN)) diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c index 69102fda9ebd46..76a80a41615bef 100644 --- a/net/wireless/wext-core.c +++ b/net/wireless/wext-core.c @@ -896,8 +896,9 @@ static int ioctl_standard_iw_point(struct iw_point *iwp, unsigned int cmd, int call_commit_handler(struct net_device *dev) { #ifdef CONFIG_WIRELESS_EXT - if ((netif_running(dev)) && - (dev->wireless_handlers->standard[0] != NULL)) + if (netif_running(dev) && + dev->wireless_handlers && + dev->wireless_handlers->standard[0]) /* Call the commit handler on the driver */ return dev->wireless_handlers->standard[0](dev, NULL, NULL, NULL); diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 62504471fd207c..52fd1f96b241eb 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -107,9 +107,9 @@ EXPORT_SYMBOL(xsk_get_pool_from_qid); void xsk_clear_pool_at_qid(struct net_device *dev, u16 queue_id) { - if (queue_id < dev->real_num_rx_queues) + if (queue_id < dev->num_rx_queues) dev->_rx[queue_id].pool = NULL; - if (queue_id < dev->real_num_tx_queues) + if (queue_id < dev->num_tx_queues) dev->_tx[queue_id].pool = NULL; } @@ -364,9 +364,9 @@ static void xsk_destruct_skb(struct sk_buff *skb) struct xdp_sock *xs = xdp_sk(skb->sk); unsigned long flags; - spin_lock_irqsave(&xs->tx_completion_lock, flags); + spin_lock_irqsave(&xs->pool->cq_lock, flags); xskq_prod_submit_addr(xs->pool->cq, addr); - spin_unlock_irqrestore(&xs->tx_completion_lock, flags); + spin_unlock_irqrestore(&xs->pool->cq_lock, flags); sock_wfree(skb); } @@ -378,6 +378,7 @@ static int xsk_generic_xmit(struct sock *sk) bool sent_frame = false; struct xdp_desc desc; struct sk_buff *skb; + unsigned long flags; int err = 0; mutex_lock(&xs->mutex); @@ -409,10 +410,13 @@ static int xsk_generic_xmit(struct sock *sk) * if there is space in it. This avoids having to implement * any buffering in the Tx path. */ + spin_lock_irqsave(&xs->pool->cq_lock, flags); if (unlikely(err) || xskq_prod_reserve(xs->pool->cq)) { + spin_unlock_irqrestore(&xs->pool->cq_lock, flags); kfree_skb(skb); goto out; } + spin_unlock_irqrestore(&xs->pool->cq_lock, flags); skb->dev = xs->dev; skb->priority = sk->sk_priority; @@ -424,6 +428,9 @@ static int xsk_generic_xmit(struct sock *sk) if (err == NETDEV_TX_BUSY) { /* Tell user-space to retry the send */ skb->destructor = sock_wfree; + spin_lock_irqsave(&xs->pool->cq_lock, flags); + xskq_prod_cancel(xs->pool->cq); + spin_unlock_irqrestore(&xs->pool->cq_lock, flags); /* Free skb without triggering the perf drop trace */ consume_skb(skb); err = -EAGAIN; @@ -772,6 +779,10 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len) } } + /* FQ and CQ are now owned by the buffer pool and cleaned up with it. */ + xs->fq_tmp = NULL; + xs->cq_tmp = NULL; + xs->dev = dev; xs->zc = xs->umem->zc; xs->queue_id = qid; @@ -1193,7 +1204,6 @@ static int xsk_create(struct net *net, struct socket *sock, int protocol, xs->state = XSK_READY; mutex_init(&xs->mutex); spin_lock_init(&xs->rx_lock); - spin_lock_init(&xs->tx_completion_lock); INIT_LIST_HEAD(&xs->map_list); spin_lock_init(&xs->map_list_lock); diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index d5adeee9d5d916..2ef6f926610eea 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -71,12 +71,11 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs, INIT_LIST_HEAD(&pool->free_list); INIT_LIST_HEAD(&pool->xsk_tx_list); spin_lock_init(&pool->xsk_tx_list_lock); + spin_lock_init(&pool->cq_lock); refcount_set(&pool->users, 1); pool->fq = xs->fq_tmp; pool->cq = xs->cq_tmp; - xs->fq_tmp = NULL; - xs->cq_tmp = NULL; for (i = 0; i < pool->free_heads_cnt; i++) { xskb = &pool->heads[i]; diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h index 9e71b9f27679b9..ef6de0fb4e312d 100644 --- a/net/xdp/xsk_queue.h +++ b/net/xdp/xsk_queue.h @@ -286,6 +286,11 @@ static inline bool xskq_prod_is_full(struct xsk_queue *q) return !free_entries; } +static inline void xskq_prod_cancel(struct xsk_queue *q) +{ + q->cached_prod--; +} + static inline int xskq_prod_reserve(struct xsk_queue *q) { if (xskq_prod_is_full(q)) diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 37456d022cfa3a..61e6220ddd5aea 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -660,7 +660,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) /* only the first xfrm gets the encap type */ encap_type = 0; - if (async && x->repl->recheck(x, skb, seq)) { + if (x->repl->recheck(x, skb, seq)) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATESEQERROR); goto drop_unlock; } diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index d622c2548d2295..b74f28cabe24f2 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -793,15 +793,22 @@ static int xfrm_policy_addr_delta(const xfrm_address_t *a, const xfrm_address_t *b, u8 prefixlen, u16 family) { + u32 ma, mb, mask; unsigned int pdw, pbi; int delta = 0; switch (family) { case AF_INET: - if (sizeof(long) == 4 && prefixlen == 0) - return ntohl(a->a4) - ntohl(b->a4); - return (ntohl(a->a4) & ((~0UL << (32 - prefixlen)))) - - (ntohl(b->a4) & ((~0UL << (32 - prefixlen)))); + if (prefixlen == 0) + return 0; + mask = ~0U << (32 - prefixlen); + ma = ntohl(a->a4) & mask; + mb = ntohl(b->a4) & mask; + if (ma < mb) + delta = -1; + else if (ma > mb) + delta = 1; + break; case AF_INET6: pdw = prefixlen >> 5; pbi = prefixlen & 0x1f; @@ -812,10 +819,13 @@ static int xfrm_policy_addr_delta(const xfrm_address_t *a, return delta; } if (pbi) { - u32 mask = ~0u << (32 - pbi); - - delta = (ntohl(a->a6[pdw]) & mask) - - (ntohl(b->a6[pdw]) & mask); + mask = ~0U << (32 - pbi); + ma = ntohl(a->a6[pdw]) & mask; + mb = ntohl(b->a6[pdw]) & mask; + if (ma < mb) + delta = -1; + else if (ma > mb) + delta = 1; } break; default: @@ -3078,8 +3088,8 @@ struct dst_entry *xfrm_lookup_with_ifid(struct net *net, xflo.flags = flags; /* To accelerate a bit... */ - if ((dst_orig->flags & DST_NOXFRM) || - !net->xfrm.policy_count[XFRM_POLICY_OUT]) + if (!if_id && ((dst_orig->flags & DST_NOXFRM) || + !net->xfrm.policy_count[XFRM_POLICY_OUT])) goto nopol; xdst = xfrm_bundle_lookup(net, fl, family, dir, &xflo, if_id); diff --git a/samples/bpf/lwt_len_hist.sh b/samples/bpf/lwt_len_hist.sh old mode 100644 new mode 100755 index 090b96eaf7f76f..0eda9754f50b81 --- a/samples/bpf/lwt_len_hist.sh +++ b/samples/bpf/lwt_len_hist.sh @@ -8,6 +8,8 @@ VETH1=tst_lwt1b TRACE_ROOT=/sys/kernel/debug/tracing function cleanup { + # To reset saved histogram, remove pinned map + rm /sys/fs/bpf/tc/globals/lwt_len_hist_map ip route del 192.168.253.2/32 dev $VETH0 2> /dev/null ip link del $VETH0 2> /dev/null ip link del $VETH1 2> /dev/null diff --git a/samples/bpf/test_lwt_bpf.sh b/samples/bpf/test_lwt_bpf.sh old mode 100644 new mode 100755 diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c index 1149e94ca32fd3..33c58de58626c6 100644 --- a/samples/bpf/xdpsock_user.c +++ b/samples/bpf/xdpsock_user.c @@ -1250,6 +1250,8 @@ static void tx_only(struct xsk_socket_info *xsk, u32 *frame_nb, int batch_size) while (xsk_ring_prod__reserve(&xsk->tx, batch_size, &idx) < batch_size) { complete_tx_only(xsk, batch_size); + if (benchmark_done) + return; } for (i = 0; i < batch_size; i++) { diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index fab38b493cef79..0ad235ee96f912 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -4384,7 +4384,7 @@ sub process { $fix) { fix_delete_line($fixlinenr, $rawline); my $fixed_line = $rawline; - $fixed_line =~ /(^..*$Type\s*$Ident\(.*\)\s*){(.*)$/; + $fixed_line =~ /(^..*$Type\s*$Ident\(.*\)\s*)\{(.*)$/; my $line1 = $1; my $line2 = $2; fix_insert_line($fixlinenr, ltrim($line1)); diff --git a/scripts/depmod.sh b/scripts/depmod.sh index e083bcae343f3e..3643b4f896eded 100755 --- a/scripts/depmod.sh +++ b/scripts/depmod.sh @@ -15,6 +15,8 @@ if ! test -r System.map ; then exit 0 fi +# legacy behavior: "depmod" in /sbin, no /sbin in PATH +PATH="$PATH:/sbin" if [ -z $(command -v $DEPMOD) ]; then echo "Warning: 'make modules_install' requires $DEPMOD. Please install it." >&2 echo "This is probably in the kmod package." >&2 diff --git a/scripts/kconfig/preprocess.c b/scripts/kconfig/preprocess.c index 0243086fb16854..0590f86df6e40c 100644 --- a/scripts/kconfig/preprocess.c +++ b/scripts/kconfig/preprocess.c @@ -114,7 +114,7 @@ static char *do_error_if(int argc, char *argv[]) if (!strcmp(argv[0], "y")) pperror("%s", argv[1]); - return NULL; + return xstrdup(""); } static char *do_filename(int argc, char *argv[]) diff --git a/scripts/kernel-doc b/scripts/kernel-doc index f699cf05d40985..6325bec3f66f85 100755 --- a/scripts/kernel-doc +++ b/scripts/kernel-doc @@ -1390,7 +1390,7 @@ sub dump_enum($$) { $members = $2; } - if ($declaration_name) { + if ($members) { my %_members; $members =~ s/\s+$//; @@ -1431,7 +1431,7 @@ sub dump_enum($$) { } } -my $typedef_type = qr { ((?:\s+[\w\*]+){1,8})\s* }x; +my $typedef_type = qr { ((?:\s+[\w\*]+\b){1,8})\s* }x; my $typedef_ident = qr { \*?\s*(\w\S+)\s* }x; my $typedef_args = qr { \s*\((.*)\); }x; diff --git a/security/integrity/ima/ima_crypto.c b/security/integrity/ima/ima_crypto.c index 21989fa0c10741..f6a7e9643b546b 100644 --- a/security/integrity/ima/ima_crypto.c +++ b/security/integrity/ima/ima_crypto.c @@ -537,7 +537,7 @@ int ima_calc_file_hash(struct file *file, struct ima_digest_data *hash) loff_t i_size; int rc; struct file *f = file; - bool new_file_instance = false, modified_mode = false; + bool new_file_instance = false; /* * For consistency, fail file's opened with the O_DIRECT flag on @@ -555,18 +555,10 @@ int ima_calc_file_hash(struct file *file, struct ima_digest_data *hash) O_TRUNC | O_CREAT | O_NOCTTY | O_EXCL); flags |= O_RDONLY; f = dentry_open(&file->f_path, flags, file->f_cred); - if (IS_ERR(f)) { - /* - * Cannot open the file again, lets modify f_mode - * of original and continue - */ - pr_info_ratelimited("Unable to reopen file for reading.\n"); - f = file; - f->f_mode |= FMODE_READ; - modified_mode = true; - } else { - new_file_instance = true; - } + if (IS_ERR(f)) + return PTR_ERR(f); + + new_file_instance = true; } i_size = i_size_read(file_inode(f)); @@ -581,8 +573,6 @@ int ima_calc_file_hash(struct file *file, struct ima_digest_data *hash) out: if (new_file_instance) fput(f); - else if (modified_mode) - f->f_mode &= ~FMODE_READ; return rc; } diff --git a/security/lsm_audit.c b/security/lsm_audit.c index 53d0d183db8f88..08d5ef49f2e479 100644 --- a/security/lsm_audit.c +++ b/security/lsm_audit.c @@ -278,7 +278,9 @@ static void dump_common_audit_data(struct audit_buffer *ab, struct inode *inode; audit_log_format(ab, " name="); + spin_lock(&a->u.dentry->d_lock); audit_log_untrustedstring(ab, a->u.dentry->d_name.name); + spin_unlock(&a->u.dentry->d_lock); inode = d_backing_inode(a->u.dentry); if (inode) { @@ -297,8 +299,9 @@ static void dump_common_audit_data(struct audit_buffer *ab, dentry = d_find_alias(inode); if (dentry) { audit_log_format(ab, " name="); - audit_log_untrustedstring(ab, - dentry->d_name.name); + spin_lock(&dentry->d_lock); + audit_log_untrustedstring(ab, dentry->d_name.name); + spin_unlock(&dentry->d_lock); dput(dentry); } audit_log_format(ab, " dev="); diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 6b1826fc3658e3..c46312710e73ec 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -1451,7 +1451,7 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent * inode_doinit with a dentry, before these inodes could * be used again by userspace. */ - goto out; + goto out_invalid; } rc = inode_doinit_use_xattr(inode, dentry, sbsec->def_sid, @@ -1508,7 +1508,7 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent * could be used again by userspace. */ if (!dentry) - goto out; + goto out_invalid; rc = selinux_genfs_get_sid(dentry, sclass, sbsec->flags, &sid); if (rc) { @@ -1533,11 +1533,10 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent out: spin_lock(&isec->lock); if (isec->initialized == LABEL_PENDING) { - if (!sid || rc) { + if (rc) { isec->initialized = LABEL_INVALID; goto out_unlock; } - isec->initialized = LABEL_INITIALIZED; isec->sid = sid; } @@ -1545,6 +1544,15 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent out_unlock: spin_unlock(&isec->lock); return rc; + +out_invalid: + spin_lock(&isec->lock); + if (isec->initialized == LABEL_PENDING) { + isec->initialized = LABEL_INVALID; + isec->sid = sid; + } + spin_unlock(&isec->lock); + return 0; } /* Convert a Linux signal to an access vector. */ diff --git a/security/smack/smack_access.c b/security/smack/smack_access.c index efe2406a39609d..7eabb448acab42 100644 --- a/security/smack/smack_access.c +++ b/security/smack/smack_access.c @@ -688,9 +688,10 @@ bool smack_privileged_cred(int cap, const struct cred *cred) bool smack_privileged(int cap) { /* - * All kernel tasks are privileged + * Kernel threads may not have credentials we can use. + * The io_uring kernel threads do have reliable credentials. */ - if (unlikely(current->flags & PF_KTHREAD)) + if ((current->flags & (PF_KTHREAD | PF_IO_WORKER)) == PF_KTHREAD) return true; return smack_privileged_cred(cap, current_cred()); diff --git a/sound/core/memalloc.c b/sound/core/memalloc.c index 0aeeb6244ff6c5..0f335162f87c71 100644 --- a/sound/core/memalloc.c +++ b/sound/core/memalloc.c @@ -77,7 +77,8 @@ static void snd_malloc_dev_iram(struct snd_dma_buffer *dmab, size_t size) /* Assign the pool into private_data field */ dmab->private_data = pool; - dmab->area = gen_pool_dma_alloc(pool, size, &dmab->addr); + dmab->area = gen_pool_dma_alloc_align(pool, size, &dmab->addr, + PAGE_SIZE); } /** diff --git a/sound/core/oss/pcm_oss.c b/sound/core/oss/pcm_oss.c index 327ec42a36b098..142fc751a84770 100644 --- a/sound/core/oss/pcm_oss.c +++ b/sound/core/oss/pcm_oss.c @@ -693,6 +693,8 @@ static int snd_pcm_oss_period_size(struct snd_pcm_substream *substream, oss_buffer_size = snd_pcm_plug_client_size(substream, snd_pcm_hw_param_value_max(slave_params, SNDRV_PCM_HW_PARAM_BUFFER_SIZE, NULL)) * oss_frame_size; + if (!oss_buffer_size) + return -EINVAL; oss_buffer_size = rounddown_pow_of_two(oss_buffer_size); if (atomic_read(&substream->mmap_count)) { if (oss_buffer_size > runtime->oss.mmap_bytes) @@ -728,17 +730,21 @@ static int snd_pcm_oss_period_size(struct snd_pcm_substream *substream, min_period_size = snd_pcm_plug_client_size(substream, snd_pcm_hw_param_value_min(slave_params, SNDRV_PCM_HW_PARAM_PERIOD_SIZE, NULL)); - min_period_size *= oss_frame_size; - min_period_size = roundup_pow_of_two(min_period_size); - if (oss_period_size < min_period_size) - oss_period_size = min_period_size; + if (min_period_size) { + min_period_size *= oss_frame_size; + min_period_size = roundup_pow_of_two(min_period_size); + if (oss_period_size < min_period_size) + oss_period_size = min_period_size; + } max_period_size = snd_pcm_plug_client_size(substream, snd_pcm_hw_param_value_max(slave_params, SNDRV_PCM_HW_PARAM_PERIOD_SIZE, NULL)); - max_period_size *= oss_frame_size; - max_period_size = rounddown_pow_of_two(max_period_size); - if (oss_period_size > max_period_size) - oss_period_size = max_period_size; + if (max_period_size) { + max_period_size *= oss_frame_size; + max_period_size = rounddown_pow_of_two(max_period_size); + if (oss_period_size > max_period_size) + oss_period_size = max_period_size; + } oss_periods = oss_buffer_size / oss_period_size; @@ -1935,11 +1941,15 @@ static int snd_pcm_oss_set_subdivide(struct snd_pcm_oss_file *pcm_oss_file, int static int snd_pcm_oss_set_fragment1(struct snd_pcm_substream *substream, unsigned int val) { struct snd_pcm_runtime *runtime; + int fragshift; runtime = substream->runtime; if (runtime->oss.subdivision || runtime->oss.fragshift) return -EINVAL; - runtime->oss.fragshift = val & 0xffff; + fragshift = val & 0xffff; + if (fragshift >= 31) + return -EINVAL; + runtime->oss.fragshift = fragshift; runtime->oss.maxfrags = (val >> 16) & 0xffff; if (runtime->oss.fragshift < 4) /* < 16 */ runtime->oss.fragshift = 4; diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index 47b155a49226f4..9f3f8e953ff04e 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -755,8 +755,13 @@ static int snd_pcm_hw_params(struct snd_pcm_substream *substream, runtime->boundary *= 2; /* clear the buffer for avoiding possible kernel info leaks */ - if (runtime->dma_area && !substream->ops->copy_user) - memset(runtime->dma_area, 0, runtime->dma_bytes); + if (runtime->dma_area && !substream->ops->copy_user) { + size_t size = runtime->dma_bytes; + + if (runtime->info & SNDRV_PCM_INFO_MMAP) + size = PAGE_ALIGN(size); + memset(runtime->dma_area, 0, size); + } snd_pcm_timer_resolution_change(substream); snd_pcm_set_state(substream, SNDRV_PCM_STATE_SETUP); diff --git a/sound/core/rawmidi.c b/sound/core/rawmidi.c index c78720a3299c42..257ad5206240fd 100644 --- a/sound/core/rawmidi.c +++ b/sound/core/rawmidi.c @@ -95,11 +95,21 @@ static inline unsigned short snd_rawmidi_file_flags(struct file *file) } } -static inline int snd_rawmidi_ready(struct snd_rawmidi_substream *substream) +static inline bool __snd_rawmidi_ready(struct snd_rawmidi_runtime *runtime) +{ + return runtime->avail >= runtime->avail_min; +} + +static bool snd_rawmidi_ready(struct snd_rawmidi_substream *substream) { struct snd_rawmidi_runtime *runtime = substream->runtime; + unsigned long flags; + bool ready; - return runtime->avail >= runtime->avail_min; + spin_lock_irqsave(&runtime->lock, flags); + ready = __snd_rawmidi_ready(runtime); + spin_unlock_irqrestore(&runtime->lock, flags); + return ready; } static inline int snd_rawmidi_ready_append(struct snd_rawmidi_substream *substream, @@ -1019,7 +1029,7 @@ int snd_rawmidi_receive(struct snd_rawmidi_substream *substream, if (result > 0) { if (runtime->event) schedule_work(&runtime->event_work); - else if (snd_rawmidi_ready(substream)) + else if (__snd_rawmidi_ready(runtime)) wake_up(&runtime->sleep); } spin_unlock_irqrestore(&runtime->lock, flags); @@ -1098,7 +1108,7 @@ static ssize_t snd_rawmidi_read(struct file *file, char __user *buf, size_t coun result = 0; while (count > 0) { spin_lock_irq(&runtime->lock); - while (!snd_rawmidi_ready(substream)) { + while (!__snd_rawmidi_ready(runtime)) { wait_queue_entry_t wait; if ((file->f_flags & O_NONBLOCK) != 0 || result > 0) { @@ -1115,9 +1125,11 @@ static ssize_t snd_rawmidi_read(struct file *file, char __user *buf, size_t coun return -ENODEV; if (signal_pending(current)) return result > 0 ? result : -ERESTARTSYS; - if (!runtime->avail) - return result > 0 ? result : -EIO; spin_lock_irq(&runtime->lock); + if (!runtime->avail) { + spin_unlock_irq(&runtime->lock); + return result > 0 ? result : -EIO; + } } spin_unlock_irq(&runtime->lock); count1 = snd_rawmidi_kernel_read1(substream, @@ -1255,7 +1267,7 @@ int __snd_rawmidi_transmit_ack(struct snd_rawmidi_substream *substream, int coun runtime->avail += count; substream->bytes += count; if (count > 0) { - if (runtime->drain || snd_rawmidi_ready(substream)) + if (runtime->drain || __snd_rawmidi_ready(runtime)) wake_up(&runtime->sleep); } return count; @@ -1444,9 +1456,11 @@ static ssize_t snd_rawmidi_write(struct file *file, const char __user *buf, return -ENODEV; if (signal_pending(current)) return result > 0 ? result : -ERESTARTSYS; - if (!runtime->avail && !timeout) - return result > 0 ? result : -EIO; spin_lock_irq(&runtime->lock); + if (!runtime->avail && !timeout) { + spin_unlock_irq(&runtime->lock); + return result > 0 ? result : -EIO; + } } spin_unlock_irq(&runtime->lock); count1 = snd_rawmidi_kernel_write1(substream, buf, NULL, count); @@ -1526,6 +1540,7 @@ static void snd_rawmidi_proc_info_read(struct snd_info_entry *entry, struct snd_rawmidi *rmidi; struct snd_rawmidi_substream *substream; struct snd_rawmidi_runtime *runtime; + unsigned long buffer_size, avail, xruns; rmidi = entry->private_data; snd_iprintf(buffer, "%s\n\n", rmidi->name); @@ -1544,13 +1559,16 @@ static void snd_rawmidi_proc_info_read(struct snd_info_entry *entry, " Owner PID : %d\n", pid_vnr(substream->pid)); runtime = substream->runtime; + spin_lock_irq(&runtime->lock); + buffer_size = runtime->buffer_size; + avail = runtime->avail; + spin_unlock_irq(&runtime->lock); snd_iprintf(buffer, " Mode : %s\n" " Buffer size : %lu\n" " Avail : %lu\n", runtime->oss ? "OSS compatible" : "native", - (unsigned long) runtime->buffer_size, - (unsigned long) runtime->avail); + buffer_size, avail); } } } @@ -1568,13 +1586,16 @@ static void snd_rawmidi_proc_info_read(struct snd_info_entry *entry, " Owner PID : %d\n", pid_vnr(substream->pid)); runtime = substream->runtime; + spin_lock_irq(&runtime->lock); + buffer_size = runtime->buffer_size; + avail = runtime->avail; + xruns = runtime->xruns; + spin_unlock_irq(&runtime->lock); snd_iprintf(buffer, " Buffer size : %lu\n" " Avail : %lu\n" " Overruns : %lu\n", - (unsigned long) runtime->buffer_size, - (unsigned long) runtime->avail, - (unsigned long) runtime->xruns); + buffer_size, avail, xruns); } } } diff --git a/sound/core/seq/oss/seq_oss_synth.c b/sound/core/seq/oss/seq_oss_synth.c index 11554d0412f06f..1b8409ec2c97f6 100644 --- a/sound/core/seq/oss/seq_oss_synth.c +++ b/sound/core/seq/oss/seq_oss_synth.c @@ -611,7 +611,8 @@ snd_seq_oss_synth_make_info(struct seq_oss_devinfo *dp, int dev, struct synth_in if (info->is_midi) { struct midi_info minf; - snd_seq_oss_midi_make_info(dp, info->midi_mapped, &minf); + if (snd_seq_oss_midi_make_info(dp, info->midi_mapped, &minf)) + return -ENXIO; inf->synth_type = SYNTH_TYPE_MIDI; inf->synth_subtype = 0; inf->nr_voices = 16; diff --git a/sound/core/seq/seq_queue.h b/sound/core/seq/seq_queue.h index 9254c8dbe5e379..25d2d6b6100791 100644 --- a/sound/core/seq/seq_queue.h +++ b/sound/core/seq/seq_queue.h @@ -26,10 +26,10 @@ struct snd_seq_queue { struct snd_seq_timer *timer; /* time keeper for this queue */ int owner; /* client that 'owns' the timer */ - unsigned int locked:1, /* timer is only accesibble by owner if set */ - klocked:1, /* kernel lock (after START) */ - check_again:1, - check_blocked:1; + bool locked; /* timer is only accesibble by owner if set */ + bool klocked; /* kernel lock (after START) */ + bool check_again; /* concurrent access happened during check */ + bool check_blocked; /* queue being checked */ unsigned int flags; /* status flags */ unsigned int info_flags; /* info for sync */ diff --git a/sound/firewire/fireface/ff-transaction.c b/sound/firewire/fireface/ff-transaction.c index 7f82762ccc8c80..ee7122c461d46f 100644 --- a/sound/firewire/fireface/ff-transaction.c +++ b/sound/firewire/fireface/ff-transaction.c @@ -88,7 +88,7 @@ static void transmit_midi_msg(struct snd_ff *ff, unsigned int port) /* Set interval to next transaction. */ ff->next_ktime[port] = ktime_add_ns(ktime_get(), - ff->rx_bytes[port] * 8 * NSEC_PER_SEC / 31250); + ff->rx_bytes[port] * 8 * (NSEC_PER_SEC / 31250)); if (quad_count == 1) tcode = TCODE_WRITE_QUADLET_REQUEST; diff --git a/sound/firewire/tascam/tascam-transaction.c b/sound/firewire/tascam/tascam-transaction.c index 90288b4b463795..a073cece4a7d5e 100644 --- a/sound/firewire/tascam/tascam-transaction.c +++ b/sound/firewire/tascam/tascam-transaction.c @@ -209,7 +209,7 @@ static void midi_port_work(struct work_struct *work) /* Set interval to next transaction. */ port->next_ktime = ktime_add_ns(ktime_get(), - port->consume_bytes * 8 * NSEC_PER_SEC / 31250); + port->consume_bytes * 8 * (NSEC_PER_SEC / 31250)); /* Start this transaction. */ port->idling = false; diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c index 4bb58e8b08a858..eec1775dfffe9f 100644 --- a/sound/pci/hda/hda_codec.c +++ b/sound/pci/hda/hda_codec.c @@ -1803,7 +1803,7 @@ int snd_hda_codec_reset(struct hda_codec *codec) return -EBUSY; /* OK, let it free */ - snd_hdac_device_unregister(&codec->core); + device_release_driver(hda_codec_dev(codec)); /* allow device access again */ snd_hda_unlock_devices(bus); @@ -2934,7 +2934,7 @@ static void hda_call_codec_resume(struct hda_codec *codec) snd_hdac_leave_pm(&codec->core); } -static int hda_codec_suspend(struct device *dev) +static int hda_codec_runtime_suspend(struct device *dev) { struct hda_codec *codec = dev_to_hda_codec(dev); unsigned int state; @@ -2953,7 +2953,7 @@ static int hda_codec_suspend(struct device *dev) return 0; } -static int hda_codec_resume(struct device *dev) +static int hda_codec_runtime_resume(struct device *dev) { struct hda_codec *codec = dev_to_hda_codec(dev); @@ -2968,16 +2968,6 @@ static int hda_codec_resume(struct device *dev) return 0; } -static int hda_codec_runtime_suspend(struct device *dev) -{ - return hda_codec_suspend(dev); -} - -static int hda_codec_runtime_resume(struct device *dev) -{ - return hda_codec_resume(dev); -} - #endif /* CONFIG_PM */ #ifdef CONFIG_PM_SLEEP @@ -2998,31 +2988,31 @@ static void hda_codec_pm_complete(struct device *dev) static int hda_codec_pm_suspend(struct device *dev) { dev->power.power_state = PMSG_SUSPEND; - return hda_codec_suspend(dev); + return pm_runtime_force_suspend(dev); } static int hda_codec_pm_resume(struct device *dev) { dev->power.power_state = PMSG_RESUME; - return hda_codec_resume(dev); + return pm_runtime_force_resume(dev); } static int hda_codec_pm_freeze(struct device *dev) { dev->power.power_state = PMSG_FREEZE; - return hda_codec_suspend(dev); + return pm_runtime_force_suspend(dev); } static int hda_codec_pm_thaw(struct device *dev) { dev->power.power_state = PMSG_THAW; - return hda_codec_resume(dev); + return pm_runtime_force_resume(dev); } static int hda_codec_pm_restore(struct device *dev) { dev->power.power_state = PMSG_RESTORE; - return hda_codec_resume(dev); + return pm_runtime_force_resume(dev); } #endif /* CONFIG_PM_SLEEP */ diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 6852668f1bcb47..d393401db1ec5c 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -2220,8 +2220,6 @@ static const struct snd_pci_quirk power_save_denylist[] = { SND_PCI_QUIRK(0x1849, 0x7662, "Asrock H81M-HDS", 0), /* https://bugzilla.redhat.com/show_bug.cgi?id=1525104 */ SND_PCI_QUIRK(0x1043, 0x8733, "Asus Prime X370-Pro", 0), - /* https://bugzilla.redhat.com/show_bug.cgi?id=1581607 */ - SND_PCI_QUIRK(0x1558, 0x3501, "Clevo W35xSS_370SS", 0), /* https://bugzilla.redhat.com/show_bug.cgi?id=1525104 */ SND_PCI_QUIRK(0x1558, 0x6504, "Clevo W65_67SB", 0), /* https://bugzilla.redhat.com/show_bug.cgi?id=1525104 */ @@ -2486,6 +2484,9 @@ static const struct pci_device_id azx_ids[] = { /* CometLake-S */ { PCI_DEVICE(0x8086, 0xa3f0), .driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE}, + /* CometLake-R */ + { PCI_DEVICE(0x8086, 0xf0c8), + .driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE}, /* Icelake */ { PCI_DEVICE(0x8086, 0x34c8), .driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE}, @@ -2509,6 +2510,9 @@ static const struct pci_device_id azx_ids[] = { /* Alderlake-S */ { PCI_DEVICE(0x8086, 0x7ad0), .driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE}, + /* Alderlake-P */ + { PCI_DEVICE(0x8086, 0x51c8), + .driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE}, /* Elkhart Lake */ { PCI_DEVICE(0x8086, 0x4b55), .driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE}, diff --git a/sound/pci/hda/hda_sysfs.c b/sound/pci/hda/hda_sysfs.c index eb8ec109d7adb5..d5ffcba794e50f 100644 --- a/sound/pci/hda/hda_sysfs.c +++ b/sound/pci/hda/hda_sysfs.c @@ -139,7 +139,7 @@ static int reconfig_codec(struct hda_codec *codec) "The codec is being used, can't reconfigure.\n"); goto error; } - err = snd_hda_codec_configure(codec); + err = device_reprobe(hda_codec_dev(codec)); if (err < 0) goto error; err = snd_card_register(codec->card); diff --git a/sound/pci/hda/hda_tegra.c b/sound/pci/hda/hda_tegra.c index 70164d1428d404..361cf2041911ad 100644 --- a/sound/pci/hda/hda_tegra.c +++ b/sound/pci/hda/hda_tegra.c @@ -388,7 +388,7 @@ static int hda_tegra_first_init(struct azx *chip, struct platform_device *pdev) * in powers of 2, next available ratio is 16 which can be * used as a limiting factor here. */ - if (of_device_is_compatible(np, "nvidia,tegra194-hda")) + if (of_device_is_compatible(np, "nvidia,tegra30-hda")) chip->bus.core.sdo_limit = 16; /* codec detection */ diff --git a/sound/pci/hda/patch_ca0132.c b/sound/pci/hda/patch_ca0132.c index d8370a417e3d44..ee500e46dd4f67 100644 --- a/sound/pci/hda/patch_ca0132.c +++ b/sound/pci/hda/patch_ca0132.c @@ -95,7 +95,7 @@ enum { }; /* Strings for Input Source Enum Control */ -static const char *const in_src_str[3] = {"Rear Mic", "Line", "Front Mic" }; +static const char *const in_src_str[3] = { "Microphone", "Line In", "Front Microphone" }; #define IN_SRC_NUM_OF_INPUTS 3 enum { REAR_MIC, @@ -1223,7 +1223,7 @@ static const struct hda_pintbl ae5_pincfgs[] = { { 0x0e, 0x01c510f0 }, /* SPDIF In */ { 0x0f, 0x01017114 }, /* Port A -- Rear L/R. */ { 0x10, 0x01017012 }, /* Port D -- Center/LFE or FP Hp */ - { 0x11, 0x01a170ff }, /* Port B -- LineMicIn2 / Rear Headphone */ + { 0x11, 0x012170ff }, /* Port B -- LineMicIn2 / Rear Headphone */ { 0x12, 0x01a170f0 }, /* Port C -- LineIn1 */ { 0x13, 0x908700f0 }, /* What U Hear In*/ { 0x18, 0x50d000f0 }, /* N/A */ diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index be5000dd158532..d49cc4409d59c0 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -1070,6 +1070,7 @@ static int patch_conexant_auto(struct hda_codec *codec) static const struct hda_device_id snd_hda_id_conexant[] = { HDA_CODEC_ENTRY(0x14f11f86, "CX8070", patch_conexant_auto), HDA_CODEC_ENTRY(0x14f12008, "CX8200", patch_conexant_auto), + HDA_CODEC_ENTRY(0x14f120d0, "CX11970", patch_conexant_auto), HDA_CODEC_ENTRY(0x14f15045, "CX20549 (Venice)", patch_conexant_auto), HDA_CODEC_ENTRY(0x14f15047, "CX20551 (Waikiki)", patch_conexant_auto), HDA_CODEC_ENTRY(0x14f15051, "CX20561 (Hermosa)", patch_conexant_auto), diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index b0068f8ca46ddf..dc1ab4fc93a5bb 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -78,6 +78,7 @@ struct hdmi_spec_per_pin { int pcm_idx; /* which pcm is attached. -1 means no pcm is attached */ int repoll_count; bool setup; /* the stream has been set up by prepare callback */ + bool silent_stream; int channels; /* current number of channels */ bool non_pcm; bool chmap_set; /* channel-map override by ALSA API? */ @@ -979,6 +980,13 @@ static int hdmi_choose_cvt(struct hda_codec *codec, else per_pin = get_pin(spec, pin_idx); + if (per_pin && per_pin->silent_stream) { + cvt_idx = cvt_nid_to_cvt_index(codec, per_pin->cvt_nid); + if (cvt_id) + *cvt_id = cvt_idx; + return 0; + } + /* Dynamically assign converter to stream */ for (cvt_idx = 0; cvt_idx < spec->num_cvts; cvt_idx++) { per_cvt = get_cvt(spec, cvt_idx); @@ -1642,27 +1650,92 @@ static void hdmi_present_sense_via_verbs(struct hdmi_spec_per_pin *per_pin, snd_hda_power_down_pm(codec); } +#define I915_SILENT_RATE 48000 +#define I915_SILENT_CHANNELS 2 +#define I915_SILENT_FORMAT SNDRV_PCM_FORMAT_S16_LE +#define I915_SILENT_FORMAT_BITS 16 +#define I915_SILENT_FMT_MASK 0xf + static void silent_stream_enable(struct hda_codec *codec, - struct hdmi_spec_per_pin *per_pin) + struct hdmi_spec_per_pin *per_pin) { - unsigned int newval, oldval; - - codec_dbg(codec, "hdmi: enabling silent stream for NID %d\n", - per_pin->pin_nid); + struct hdmi_spec *spec = codec->spec; + struct hdmi_spec_per_cvt *per_cvt; + int cvt_idx, pin_idx, err; + unsigned int format; mutex_lock(&per_pin->lock); - if (!per_pin->channels) - per_pin->channels = 2; + if (per_pin->setup) { + codec_dbg(codec, "hdmi: PCM already open, no silent stream\n"); + goto unlock_out; + } - oldval = snd_hda_codec_read(codec, per_pin->pin_nid, 0, - AC_VERB_GET_CONV, 0); - newval = (oldval & 0xF0) | 0xF; - snd_hda_codec_write(codec, per_pin->pin_nid, 0, - AC_VERB_SET_CHANNEL_STREAMID, newval); + pin_idx = pin_id_to_pin_index(codec, per_pin->pin_nid, per_pin->dev_id); + err = hdmi_choose_cvt(codec, pin_idx, &cvt_idx); + if (err) { + codec_err(codec, "hdmi: no free converter to enable silent mode\n"); + goto unlock_out; + } + + per_cvt = get_cvt(spec, cvt_idx); + per_cvt->assigned = 1; + per_pin->cvt_nid = per_cvt->cvt_nid; + per_pin->silent_stream = true; + codec_dbg(codec, "hdmi: enabling silent stream pin-NID=0x%x cvt-NID=0x%x\n", + per_pin->pin_nid, per_cvt->cvt_nid); + + snd_hda_set_dev_select(codec, per_pin->pin_nid, per_pin->dev_id); + snd_hda_codec_write_cache(codec, per_pin->pin_nid, 0, + AC_VERB_SET_CONNECT_SEL, + per_pin->mux_idx); + + /* configure unused pins to choose other converters */ + pin_cvt_fixup(codec, per_pin, 0); + + snd_hdac_sync_audio_rate(&codec->core, per_pin->pin_nid, + per_pin->dev_id, I915_SILENT_RATE); + + /* trigger silent stream generation in hw */ + format = snd_hdac_calc_stream_format(I915_SILENT_RATE, I915_SILENT_CHANNELS, + I915_SILENT_FORMAT, I915_SILENT_FORMAT_BITS, 0); + snd_hda_codec_setup_stream(codec, per_pin->cvt_nid, + I915_SILENT_FMT_MASK, I915_SILENT_FMT_MASK, format); + usleep_range(100, 200); + snd_hda_codec_setup_stream(codec, per_pin->cvt_nid, I915_SILENT_FMT_MASK, 0, format); + + per_pin->channels = I915_SILENT_CHANNELS; hdmi_setup_audio_infoframe(codec, per_pin, per_pin->non_pcm); + unlock_out: + mutex_unlock(&per_pin->lock); +} + +static void silent_stream_disable(struct hda_codec *codec, + struct hdmi_spec_per_pin *per_pin) +{ + struct hdmi_spec *spec = codec->spec; + struct hdmi_spec_per_cvt *per_cvt; + int cvt_idx; + + mutex_lock(&per_pin->lock); + if (!per_pin->silent_stream) + goto unlock_out; + + codec_dbg(codec, "HDMI: disable silent stream on pin-NID=0x%x cvt-NID=0x%x\n", + per_pin->pin_nid, per_pin->cvt_nid); + + cvt_idx = cvt_nid_to_cvt_index(codec, per_pin->cvt_nid); + if (cvt_idx >= 0 && cvt_idx < spec->num_cvts) { + per_cvt = get_cvt(spec, cvt_idx); + per_cvt->assigned = 0; + } + + per_pin->cvt_nid = 0; + per_pin->silent_stream = false; + + unlock_out: mutex_unlock(&per_pin->lock); } @@ -1701,6 +1774,7 @@ static void sync_eld_via_acomp(struct hda_codec *codec, pm_ret); silent_stream_enable(codec, per_pin); } else if (monitor_prev && !monitor_next) { + silent_stream_disable(codec, per_pin); pm_ret = snd_hda_power_down_pm(codec); if (pm_ret < 0) codec_err(codec, @@ -4275,6 +4349,7 @@ HDA_CODEC_ENTRY(0x8086280f, "Icelake HDMI", patch_i915_icl_hdmi), HDA_CODEC_ENTRY(0x80862812, "Tigerlake HDMI", patch_i915_tgl_hdmi), HDA_CODEC_ENTRY(0x80862814, "DG1 HDMI", patch_i915_tgl_hdmi), HDA_CODEC_ENTRY(0x80862815, "Alderlake HDMI", patch_i915_tgl_hdmi), +HDA_CODEC_ENTRY(0x8086281c, "Alderlake-P HDMI", patch_i915_tgl_hdmi), HDA_CODEC_ENTRY(0x80862816, "Rocketlake HDMI", patch_i915_tgl_hdmi), HDA_CODEC_ENTRY(0x8086281a, "Jasperlake HDMI", patch_i915_icl_hdmi), HDA_CODEC_ENTRY(0x8086281b, "Elkhartlake HDMI", patch_i915_icl_hdmi), diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 8616c562487071..290645516313c6 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -2516,6 +2516,7 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = { SND_PCI_QUIRK(0x1458, 0xa0ce, "Gigabyte X570 Aorus Xtreme", ALC1220_FIXUP_CLEVO_P950), SND_PCI_QUIRK(0x1462, 0x11f7, "MSI-GE63", ALC1220_FIXUP_CLEVO_P950), SND_PCI_QUIRK(0x1462, 0x1228, "MSI-GP63", ALC1220_FIXUP_CLEVO_P950), + SND_PCI_QUIRK(0x1462, 0x1229, "MSI-GP73", ALC1220_FIXUP_CLEVO_P950), SND_PCI_QUIRK(0x1462, 0x1275, "MSI-GL63", ALC1220_FIXUP_CLEVO_P950), SND_PCI_QUIRK(0x1462, 0x1276, "MSI-GL73", ALC1220_FIXUP_CLEVO_P950), SND_PCI_QUIRK(0x1462, 0x1293, "MSI-GP65", ALC1220_FIXUP_CLEVO_P950), @@ -3104,6 +3105,7 @@ static void alc_disable_headset_jack_key(struct hda_codec *codec) case 0x10ec0215: case 0x10ec0225: case 0x10ec0285: + case 0x10ec0287: case 0x10ec0295: case 0x10ec0289: case 0x10ec0299: @@ -3130,6 +3132,7 @@ static void alc_enable_headset_jack_key(struct hda_codec *codec) case 0x10ec0215: case 0x10ec0225: case 0x10ec0285: + case 0x10ec0287: case 0x10ec0295: case 0x10ec0289: case 0x10ec0299: @@ -6286,6 +6289,7 @@ enum { ALC221_FIXUP_HP_FRONT_MIC, ALC292_FIXUP_TPT460, ALC298_FIXUP_SPK_VOLUME, + ALC298_FIXUP_LENOVO_SPK_VOLUME, ALC256_FIXUP_DELL_INSPIRON_7559_SUBWOOFER, ALC269_FIXUP_ATIV_BOOK_8, ALC221_FIXUP_HP_MIC_NO_PRESENCE, @@ -6366,6 +6370,8 @@ enum { ALC287_FIXUP_HP_GPIO_LED, ALC256_FIXUP_HP_HEADSET_MIC, ALC236_FIXUP_DELL_AIO_HEADSET_MIC, + ALC282_FIXUP_ACER_DISABLE_LINEOUT, + ALC255_FIXUP_ACER_LIMIT_INT_MIC_BOOST, }; static const struct hda_fixup alc269_fixups[] = { @@ -7115,6 +7121,10 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC298_FIXUP_DELL_AIO_MIC_NO_PRESENCE, }, + [ALC298_FIXUP_LENOVO_SPK_VOLUME] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc298_fixup_speaker_volume, + }, [ALC295_FIXUP_DISABLE_DAC3] = { .type = HDA_FIXUP_FUNC, .v.func = alc295_fixup_disable_dac3, @@ -7789,6 +7799,22 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC255_FIXUP_DELL1_MIC_NO_PRESENCE }, + [ALC282_FIXUP_ACER_DISABLE_LINEOUT] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x1b, 0x411111f0 }, + { 0x18, 0x01a1913c }, /* use as headset mic, without its own jack detect */ + { }, + }, + .chained = true, + .chain_id = ALC269_FIXUP_HEADSET_MODE + }, + [ALC255_FIXUP_ACER_LIMIT_INT_MIC_BOOST] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc269_fixup_limit_int_mic_boost, + .chained = true, + .chain_id = ALC255_FIXUP_ACER_MIC_NO_PRESENCE, + }, }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { @@ -7803,11 +7829,15 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1025, 0x0762, "Acer Aspire E1-472", ALC271_FIXUP_HP_GATE_MIC_JACK_E1_572), SND_PCI_QUIRK(0x1025, 0x0775, "Acer Aspire E1-572", ALC271_FIXUP_HP_GATE_MIC_JACK_E1_572), SND_PCI_QUIRK(0x1025, 0x079b, "Acer Aspire V5-573G", ALC282_FIXUP_ASPIRE_V5_PINS), + SND_PCI_QUIRK(0x1025, 0x101c, "Acer Veriton N2510G", ALC269_FIXUP_LIFEBOOK), SND_PCI_QUIRK(0x1025, 0x102b, "Acer Aspire C24-860", ALC286_FIXUP_ACER_AIO_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1025, 0x1065, "Acer Aspire C20-820", ALC269VC_FIXUP_ACER_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x106d, "Acer Cloudbook 14", ALC283_FIXUP_CHROME_BOOK), + SND_PCI_QUIRK(0x1025, 0x1094, "Acer Aspire E5-575T", ALC255_FIXUP_ACER_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x1025, 0x1099, "Acer Aspire E5-523G", ALC255_FIXUP_ACER_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1025, 0x110e, "Acer Aspire ES1-432", ALC255_FIXUP_ACER_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1025, 0x1166, "Acer Veriton N4640G", ALC269_FIXUP_LIFEBOOK), + SND_PCI_QUIRK(0x1025, 0x1167, "Acer Veriton N6640G", ALC269_FIXUP_LIFEBOOK), SND_PCI_QUIRK(0x1025, 0x1246, "Acer Predator Helios 500", ALC299_FIXUP_PREDATOR_SPK), SND_PCI_QUIRK(0x1025, 0x1247, "Acer vCopperbox", ALC269VC_FIXUP_ACER_VCOPPERBOX_PINS), SND_PCI_QUIRK(0x1025, 0x1248, "Acer Veriton N4660G", ALC269VC_FIXUP_ACER_MIC_NO_PRESENCE), @@ -7868,6 +7898,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x09bf, "Dell Precision", ALC233_FIXUP_ASUS_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x0a2e, "Dell", ALC236_FIXUP_DELL_AIO_HEADSET_MIC), SND_PCI_QUIRK(0x1028, 0x0a30, "Dell", ALC236_FIXUP_DELL_AIO_HEADSET_MIC), + SND_PCI_QUIRK(0x1028, 0x0a58, "Dell", ALC255_FIXUP_DELL_HEADSET_MIC), SND_PCI_QUIRK(0x1028, 0x164a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x164b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x1586, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC2), @@ -7941,11 +7972,17 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8497, "HP Envy x360", ALC269_FIXUP_HP_MUTE_LED_MIC3), SND_PCI_QUIRK(0x103c, 0x84e7, "HP Pavilion 15", ALC269_FIXUP_HP_MUTE_LED_MIC3), SND_PCI_QUIRK(0x103c, 0x869d, "HP", ALC236_FIXUP_HP_MUTE_LED), + SND_PCI_QUIRK(0x103c, 0x8724, "HP EliteBook 850 G7", ALC285_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8729, "HP", ALC285_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8736, "HP", ALC285_FIXUP_HP_GPIO_AMP_INIT), SND_PCI_QUIRK(0x103c, 0x8760, "HP", ALC285_FIXUP_HP_MUTE_LED), SND_PCI_QUIRK(0x103c, 0x877a, "HP", ALC285_FIXUP_HP_MUTE_LED), SND_PCI_QUIRK(0x103c, 0x877d, "HP", ALC236_FIXUP_HP_MUTE_LED), + SND_PCI_QUIRK(0x103c, 0x8780, "HP ZBook Fury 17 G7 Mobile Workstation", + ALC285_FIXUP_HP_GPIO_AMP_INIT), + SND_PCI_QUIRK(0x103c, 0x8783, "HP ZBook Fury 15 G7 Mobile Workstation", + ALC285_FIXUP_HP_GPIO_AMP_INIT), + SND_PCI_QUIRK(0x103c, 0x87c8, "HP", ALC287_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x87f4, "HP", ALC287_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x87f5, "HP", ALC287_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x1043, 0x103e, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC), @@ -7956,6 +7993,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x10d0, "ASUS X540LA/X540LJ", ALC255_FIXUP_ASUS_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x115d, "Asus 1015E", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x1043, 0x11c0, "ASUS X556UR", ALC255_FIXUP_ASUS_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1043, 0x1271, "ASUS X430UN", ALC256_FIXUP_ASUS_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x1290, "ASUS X441SA", ALC233_FIXUP_EAPD_COEF_AND_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x12a0, "ASUS X441UV", ALC233_FIXUP_EAPD_COEF_AND_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x12f0, "ASUS X541UV", ALC256_FIXUP_ASUS_MIC), @@ -7968,6 +8006,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x18b1, "Asus MJ401TA", ALC256_FIXUP_ASUS_HEADSET_MIC), SND_PCI_QUIRK(0x1043, 0x18f1, "Asus FX505DT", ALC256_FIXUP_ASUS_HEADSET_MIC), SND_PCI_QUIRK(0x1043, 0x194e, "ASUS UX563FD", ALC294_FIXUP_ASUS_HPE), + SND_PCI_QUIRK(0x1043, 0x1982, "ASUS B1400CEPE", ALC256_FIXUP_ASUS_HPE), SND_PCI_QUIRK(0x1043, 0x19ce, "ASUS B9450FA", ALC294_FIXUP_ASUS_HPE), SND_PCI_QUIRK(0x1043, 0x19e1, "ASUS UX581LV", ALC295_FIXUP_ASUS_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x1a13, "Asus G73Jw", ALC269_FIXUP_ASUS_G73JW), @@ -7976,6 +8015,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x1b13, "Asus U41SV", ALC269_FIXUP_INV_DMIC), SND_PCI_QUIRK(0x1043, 0x1bbd, "ASUS Z550MA", ALC255_FIXUP_ASUS_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x1c23, "Asus X55U", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), + SND_PCI_QUIRK(0x1043, 0x125e, "ASUS Q524UQK", ALC255_FIXUP_ASUS_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x1ccd, "ASUS X555UB", ALC256_FIXUP_ASUS_MIC), SND_PCI_QUIRK(0x1043, 0x1d4e, "ASUS TM420", ALC256_FIXUP_ASUS_HPE), SND_PCI_QUIRK(0x1043, 0x1e11, "ASUS Zephyrus G15", ALC289_FIXUP_ASUS_GA502), @@ -8001,6 +8041,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x10cf, 0x1845, "Lifebook U904", ALC269_FIXUP_LIFEBOOK_EXTMIC), SND_PCI_QUIRK(0x10ec, 0x10f2, "Intel Reference board", ALC700_FIXUP_INTEL_REFERENCE), SND_PCI_QUIRK(0x10ec, 0x1230, "Intel Reference board", ALC295_FIXUP_CHROME_BOOK), + SND_PCI_QUIRK(0x10ec, 0x1252, "Intel Reference board", ALC295_FIXUP_CHROME_BOOK), + SND_PCI_QUIRK(0x10ec, 0x1254, "Intel Reference board", ALC295_FIXUP_CHROME_BOOK), SND_PCI_QUIRK(0x10f7, 0x8338, "Panasonic CF-SZ6", ALC269_FIXUP_HEADSET_MODE), SND_PCI_QUIRK(0x144d, 0xc109, "Samsung Ativ book 9 (NP900X3G)", ALC269_FIXUP_INV_DMIC), SND_PCI_QUIRK(0x144d, 0xc169, "Samsung Notebook 9 Pen (NP930SBE-K01US)", ALC298_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET), @@ -8013,6 +8055,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1458, 0xfa53, "Gigabyte BXBT-2807", ALC283_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x1462, 0xb120, "MSI Cubi MS-B120", ALC283_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x1462, 0xb171, "Cubi N 8GL (MS-B171)", ALC283_FIXUP_HEADSET_MIC), + SND_PCI_QUIRK(0x152d, 0x1082, "Quanta NL3", ALC269_FIXUP_LIFEBOOK), SND_PCI_QUIRK(0x1558, 0x1323, "Clevo N130ZU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x1325, "System76 Darter Pro (darp5)", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x1401, "Clevo L140[CZ]U", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), @@ -8105,6 +8148,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x3151, "ThinkCentre Station", ALC283_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x17aa, 0x3176, "ThinkCentre Station", ALC283_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x17aa, 0x3178, "ThinkCentre Station", ALC283_FIXUP_HEADSET_MIC), + SND_PCI_QUIRK(0x17aa, 0x3818, "Lenovo C940", ALC298_FIXUP_LENOVO_SPK_VOLUME), SND_PCI_QUIRK(0x17aa, 0x3902, "Lenovo E50-80", ALC269_FIXUP_DMIC_THINKPAD_ACPI), SND_PCI_QUIRK(0x17aa, 0x3977, "IdeaPad S210", ALC283_FIXUP_INT_MIC), SND_PCI_QUIRK(0x17aa, 0x3978, "Lenovo B50-70", ALC269_FIXUP_DMIC_THINKPAD_ACPI), @@ -8560,6 +8604,22 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = { {0x12, 0x90a60140}, {0x19, 0x04a11030}, {0x21, 0x04211020}), + SND_HDA_PIN_QUIRK(0x10ec0282, 0x1025, "Acer", ALC282_FIXUP_ACER_DISABLE_LINEOUT, + ALC282_STANDARD_PINS, + {0x12, 0x90a609c0}, + {0x18, 0x03a11830}, + {0x19, 0x04a19831}, + {0x1a, 0x0481303f}, + {0x1b, 0x04211020}, + {0x21, 0x0321101f}), + SND_HDA_PIN_QUIRK(0x10ec0282, 0x1025, "Acer", ALC282_FIXUP_ACER_DISABLE_LINEOUT, + ALC282_STANDARD_PINS, + {0x12, 0x90a60940}, + {0x18, 0x03a11830}, + {0x19, 0x04a19831}, + {0x1a, 0x0481303f}, + {0x1b, 0x04211020}, + {0x21, 0x0321101f}), SND_HDA_PIN_QUIRK(0x10ec0283, 0x1028, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE, ALC282_STANDARD_PINS, {0x12, 0x90a60130}, @@ -8573,11 +8633,20 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = { {0x12, 0x90a60130}, {0x19, 0x03a11020}, {0x21, 0x0321101f}), + SND_HDA_PIN_QUIRK(0x10ec0285, 0x17aa, "Lenovo", ALC285_FIXUP_THINKPAD_NO_BASS_SPK_HEADSET_JACK, + {0x14, 0x90170110}, + {0x19, 0x04a11040}, + {0x21, 0x04211020}), SND_HDA_PIN_QUIRK(0x10ec0285, 0x17aa, "Lenovo", ALC285_FIXUP_LENOVO_PC_BEEP_IN_NOISE, {0x12, 0x90a60130}, {0x14, 0x90170110}, {0x19, 0x04a11040}, {0x21, 0x04211020}), + SND_HDA_PIN_QUIRK(0x10ec0287, 0x17aa, "Lenovo", ALC285_FIXUP_THINKPAD_HEADSET_JACK, + {0x14, 0x90170110}, + {0x17, 0x90170111}, + {0x19, 0x03a11030}, + {0x21, 0x03211020}), SND_HDA_PIN_QUIRK(0x10ec0286, 0x1025, "Acer", ALC286_FIXUP_ACER_AIO_MIC_NO_PRESENCE, {0x12, 0x90a60130}, {0x17, 0x90170110}, diff --git a/sound/pci/hda/patch_via.c b/sound/pci/hda/patch_via.c index 7ef8f3105cdb7f..a5c1a2c4eae4e6 100644 --- a/sound/pci/hda/patch_via.c +++ b/sound/pci/hda/patch_via.c @@ -113,6 +113,7 @@ static struct via_spec *via_new_spec(struct hda_codec *codec) spec->codec_type = VT1708S; spec->gen.indep_hp = 1; spec->gen.keep_eapd_on = 1; + spec->gen.dac_min_mute = 1; spec->gen.pcm_playback_hook = via_playback_pcm_hook; spec->gen.add_stereo_mix_input = HDA_HINT_STEREO_MIX_AUTO; codec->power_save_node = 1; @@ -1002,6 +1003,7 @@ static const struct hda_verb vt1802_init_verbs[] = { enum { VIA_FIXUP_INTMIC_BOOST, VIA_FIXUP_ASUS_G75, + VIA_FIXUP_POWER_SAVE, }; static void via_fixup_intmic_boost(struct hda_codec *codec, @@ -1011,6 +1013,13 @@ static void via_fixup_intmic_boost(struct hda_codec *codec, override_mic_boost(codec, 0x30, 0, 2, 40); } +static void via_fixup_power_save(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + if (action == HDA_FIXUP_ACT_PRE_PROBE) + codec->power_save_node = 0; +} + static const struct hda_fixup via_fixups[] = { [VIA_FIXUP_INTMIC_BOOST] = { .type = HDA_FIXUP_FUNC, @@ -1025,11 +1034,16 @@ static const struct hda_fixup via_fixups[] = { { } } }, + [VIA_FIXUP_POWER_SAVE] = { + .type = HDA_FIXUP_FUNC, + .v.func = via_fixup_power_save, + }, }; static const struct snd_pci_quirk vt2002p_fixups[] = { SND_PCI_QUIRK(0x1043, 0x1487, "Asus G75", VIA_FIXUP_ASUS_G75), SND_PCI_QUIRK(0x1043, 0x8532, "Asus X202E", VIA_FIXUP_INTMIC_BOOST), + SND_PCI_QUIRK_VENDOR(0x1558, "Clevo", VIA_FIXUP_POWER_SAVE), {} }; diff --git a/sound/soc/amd/acp-da7219-max98357a.c b/sound/soc/amd/acp-da7219-max98357a.c index a7702e64ec512d..849288d01c6b4c 100644 --- a/sound/soc/amd/acp-da7219-max98357a.c +++ b/sound/soc/amd/acp-da7219-max98357a.c @@ -73,8 +73,13 @@ static int cz_da7219_init(struct snd_soc_pcm_runtime *rtd) return ret; } - da7219_dai_wclk = clk_get(component->dev, "da7219-dai-wclk"); - da7219_dai_bclk = clk_get(component->dev, "da7219-dai-bclk"); + da7219_dai_wclk = devm_clk_get(component->dev, "da7219-dai-wclk"); + if (IS_ERR(da7219_dai_wclk)) + return PTR_ERR(da7219_dai_wclk); + + da7219_dai_bclk = devm_clk_get(component->dev, "da7219-dai-bclk"); + if (IS_ERR(da7219_dai_bclk)) + return PTR_ERR(da7219_dai_bclk); ret = snd_soc_card_jack_new(card, "Headset Jack", SND_JACK_HEADSET | SND_JACK_LINEOUT | diff --git a/sound/soc/amd/raven/pci-acp3x.c b/sound/soc/amd/raven/pci-acp3x.c index 31b797c8bfe644..77f2d938960679 100644 --- a/sound/soc/amd/raven/pci-acp3x.c +++ b/sound/soc/amd/raven/pci-acp3x.c @@ -118,6 +118,10 @@ static int snd_acp3x_probe(struct pci_dev *pci, int ret, i; u32 addr, val; + /* Raven device detection */ + if (pci->revision != 0x00) + return -ENODEV; + if (pci_enable_device(pci)) { dev_err(&pci->dev, "pci_enable_device failed\n"); return -ENODEV; diff --git a/sound/soc/amd/renoir/rn-pci-acp3x.c b/sound/soc/amd/renoir/rn-pci-acp3x.c index b943e59fc30243..917536def5f2af 100644 --- a/sound/soc/amd/renoir/rn-pci-acp3x.c +++ b/sound/soc/amd/renoir/rn-pci-acp3x.c @@ -6,6 +6,7 @@ #include #include +#include #include #include #include @@ -20,14 +21,13 @@ module_param(acp_power_gating, int, 0644); MODULE_PARM_DESC(acp_power_gating, "Enable acp power gating"); /** - * dmic_acpi_check = -1 - Checks ACPI method to know DMIC hardware status runtime - * = 0 - Skips the DMIC device creation and returns probe failure - * = 1 - Assumes that platform has DMIC support and skips ACPI - * method check + * dmic_acpi_check = -1 - Use ACPI/DMI method to detect the DMIC hardware presence at runtime + * = 0 - Skip the DMIC device creation and return probe failure + * = 1 - Force DMIC support */ static int dmic_acpi_check = ACP_DMIC_AUTO; module_param(dmic_acpi_check, bint, 0644); -MODULE_PARM_DESC(dmic_acpi_check, "checks Dmic hardware runtime"); +MODULE_PARM_DESC(dmic_acpi_check, "Digital microphone presence (-1=auto, 0=none, 1=force)"); struct acp_dev_data { void __iomem *acp_base; @@ -163,6 +163,38 @@ static int rn_acp_deinit(void __iomem *acp_base) return 0; } +static const struct dmi_system_id rn_acp_quirk_table[] = { + { + /* Lenovo IdeaPad S340-14API */ + .matches = { + DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "81NB"), + } + }, + { + /* Lenovo IdeaPad Flex 5 14ARE05 */ + .matches = { + DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "81X2"), + } + }, + { + /* Lenovo IdeaPad 5 15ARE05 */ + .matches = { + DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "81YQ"), + } + }, + { + /* Lenovo ThinkPad X395 */ + .matches = { + DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_EXACT_MATCH(DMI_BOARD_NAME, "20NLCTO1WW"), + } + }, + {} +}; + static int snd_rn_acp_probe(struct pci_dev *pci, const struct pci_device_id *pci_id) { @@ -172,10 +204,15 @@ static int snd_rn_acp_probe(struct pci_dev *pci, acpi_handle handle; acpi_integer dmic_status; #endif + const struct dmi_system_id *dmi_id; unsigned int irqflags; int ret, index; u32 addr; + /* Renoir device check */ + if (pci->revision != 0x01) + return -ENODEV; + if (pci_enable_device(pci)) { dev_err(&pci->dev, "pci_enable_device failed\n"); return -ENODEV; @@ -232,6 +269,12 @@ static int snd_rn_acp_probe(struct pci_dev *pci, goto de_init; } #endif + dmi_id = dmi_first_match(rn_acp_quirk_table); + if (dmi_id && !dmi_id->driver_data) { + dev_info(&pci->dev, "ACPI settings override using DMI (ACP mic is not present)"); + ret = -ENODEV; + goto de_init; + } } adata->res = devm_kzalloc(&pci->dev, diff --git a/sound/soc/atmel/Kconfig b/sound/soc/atmel/Kconfig index bd8854bfd2ee41..142373ec411adb 100644 --- a/sound/soc/atmel/Kconfig +++ b/sound/soc/atmel/Kconfig @@ -148,6 +148,7 @@ config SND_MCHP_SOC_SPDIFTX config SND_MCHP_SOC_SPDIFRX tristate "Microchip ASoC driver for boards using S/PDIF RX" depends on OF && (ARCH_AT91 || COMPILE_TEST) + depends on COMMON_CLK select SND_SOC_GENERIC_DMAENGINE_PCM select REGMAP_MMIO help diff --git a/sound/soc/codecs/cros_ec_codec.c b/sound/soc/codecs/cros_ec_codec.c index 28f039adfa1388..5c3b7e5e55d23a 100644 --- a/sound/soc/codecs/cros_ec_codec.c +++ b/sound/soc/codecs/cros_ec_codec.c @@ -332,7 +332,7 @@ static int i2s_rx_event(struct snd_soc_dapm_widget *w, snd_soc_dapm_to_component(w->dapm); struct cros_ec_codec_priv *priv = snd_soc_component_get_drvdata(component); - struct ec_param_ec_codec_i2s_rx p; + struct ec_param_ec_codec_i2s_rx p = {}; switch (event) { case SND_SOC_DAPM_PRE_PMU: diff --git a/sound/soc/codecs/cx2072x.c b/sound/soc/codecs/cx2072x.c index 2ad00ed21bec6c..2f10991a8bdb51 100644 --- a/sound/soc/codecs/cx2072x.c +++ b/sound/soc/codecs/cx2072x.c @@ -1579,7 +1579,7 @@ static struct snd_soc_dai_driver soc_codec_cx2072x_dai[] = { .id = CX2072X_DAI_DSP, .probe = cx2072x_dsp_dai_probe, .playback = { - .stream_name = "Playback", + .stream_name = "DSP Playback", .channels_min = 2, .channels_max = 2, .rates = CX2072X_RATES_DSP, @@ -1591,7 +1591,7 @@ static struct snd_soc_dai_driver soc_codec_cx2072x_dai[] = { .name = "cx2072x-aec", .id = 3, .capture = { - .stream_name = "Capture", + .stream_name = "AEC Capture", .channels_min = 2, .channels_max = 2, .rates = CX2072X_RATES_DSP, diff --git a/sound/soc/codecs/max98390.c b/sound/soc/codecs/max98390.c index ff5cc9bbec2917..bb736c44e68a36 100644 --- a/sound/soc/codecs/max98390.c +++ b/sound/soc/codecs/max98390.c @@ -784,6 +784,7 @@ static int max98390_dsm_init(struct snd_soc_component *component) if (fw->size < MAX98390_DSM_PARAM_MIN_SIZE) { dev_err(component->dev, "param fw is invalid.\n"); + ret = -EINVAL; goto err_alloc; } dsm_param = (char *)fw->data; @@ -794,6 +795,7 @@ static int max98390_dsm_init(struct snd_soc_component *component) fw->size < param_size + MAX98390_DSM_PAYLOAD_OFFSET) { dev_err(component->dev, "param fw is invalid.\n"); + ret = -EINVAL; goto err_alloc; } regmap_write(max98390->regmap, MAX98390_R203A_AMP_EN, 0x80); diff --git a/sound/soc/codecs/rt711.c b/sound/soc/codecs/rt711.c index 65b59dbfb43c89..a9b1b4180c471a 100644 --- a/sound/soc/codecs/rt711.c +++ b/sound/soc/codecs/rt711.c @@ -462,6 +462,8 @@ static int rt711_set_amp_gain_put(struct snd_kcontrol *kcontrol, unsigned int read_ll, read_rl; int i; + mutex_lock(&rt711->calibrate_mutex); + /* Can't use update bit function, so read the original value first */ addr_h = mc->reg; addr_l = mc->rreg; @@ -547,6 +549,8 @@ static int rt711_set_amp_gain_put(struct snd_kcontrol *kcontrol, if (dapm->bias_level <= SND_SOC_BIAS_STANDBY) regmap_write(rt711->regmap, RT711_SET_AUDIO_POWER_STATE, AC_PWRST_D3); + + mutex_unlock(&rt711->calibrate_mutex); return 0; } @@ -859,9 +863,11 @@ static int rt711_set_bias_level(struct snd_soc_component *component, break; case SND_SOC_BIAS_STANDBY: + mutex_lock(&rt711->calibrate_mutex); regmap_write(rt711->regmap, RT711_SET_AUDIO_POWER_STATE, AC_PWRST_D3); + mutex_unlock(&rt711->calibrate_mutex); break; default: diff --git a/sound/soc/codecs/wm8994.c b/sound/soc/codecs/wm8994.c index fc9ea198ac7990..f57884113406b8 100644 --- a/sound/soc/codecs/wm8994.c +++ b/sound/soc/codecs/wm8994.c @@ -4645,8 +4645,12 @@ static int wm8994_probe(struct platform_device *pdev) pm_runtime_enable(&pdev->dev); pm_runtime_idle(&pdev->dev); - return devm_snd_soc_register_component(&pdev->dev, &soc_component_dev_wm8994, + ret = devm_snd_soc_register_component(&pdev->dev, &soc_component_dev_wm8994, wm8994_dai, ARRAY_SIZE(wm8994_dai)); + if (ret < 0) + pm_runtime_disable(&pdev->dev); + + return ret; } static int wm8994_remove(struct platform_device *pdev) diff --git a/sound/soc/codecs/wm8997.c b/sound/soc/codecs/wm8997.c index 37e4bb3dbd8a9f..229f2986cd96b2 100644 --- a/sound/soc/codecs/wm8997.c +++ b/sound/soc/codecs/wm8997.c @@ -1177,6 +1177,8 @@ static int wm8997_probe(struct platform_device *pdev) goto err_spk_irqs; } + return ret; + err_spk_irqs: arizona_free_spk_irqs(arizona); diff --git a/sound/soc/codecs/wm8998.c b/sound/soc/codecs/wm8998.c index f6c5cc80c970b6..5413254295b70d 100644 --- a/sound/soc/codecs/wm8998.c +++ b/sound/soc/codecs/wm8998.c @@ -1375,7 +1375,7 @@ static int wm8998_probe(struct platform_device *pdev) ret = arizona_init_spk_irqs(arizona); if (ret < 0) - return ret; + goto err_pm_disable; ret = devm_snd_soc_register_component(&pdev->dev, &soc_component_dev_wm8998, @@ -1390,6 +1390,8 @@ static int wm8998_probe(struct platform_device *pdev) err_spk_irqs: arizona_free_spk_irqs(arizona); +err_pm_disable: + pm_runtime_disable(&pdev->dev); return ret; } diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c index e61d00486c6537..dec8716aa8ef5e 100644 --- a/sound/soc/codecs/wm_adsp.c +++ b/sound/soc/codecs/wm_adsp.c @@ -1519,7 +1519,7 @@ static int wm_adsp_create_control(struct wm_adsp *dsp, ctl_work = kzalloc(sizeof(*ctl_work), GFP_KERNEL); if (!ctl_work) { ret = -ENOMEM; - goto err_ctl_cache; + goto err_list_del; } ctl_work->dsp = dsp; @@ -1529,7 +1529,8 @@ static int wm_adsp_create_control(struct wm_adsp *dsp, return 0; -err_ctl_cache: +err_list_del: + list_del(&ctl->list); kfree(ctl->cache); err_ctl_subname: kfree(ctl->subname); diff --git a/sound/soc/intel/Kconfig b/sound/soc/intel/Kconfig index a5b446d5af19fc..c1bf69a0bcfe18 100644 --- a/sound/soc/intel/Kconfig +++ b/sound/soc/intel/Kconfig @@ -198,7 +198,7 @@ endif ## SND_SOC_INTEL_SST_TOPLEVEL || SND_SOC_SOF_INTEL_TOPLEVEL config SND_SOC_INTEL_KEEMBAY tristate "Keembay Platforms" - depends on ARM64 || COMPILE_TEST + depends on ARCH_KEEMBAY || COMPILE_TEST depends on COMMON_CLK help If you have a Intel Keembay platform then enable this option diff --git a/sound/soc/intel/boards/haswell.c b/sound/soc/intel/boards/haswell.c index c55d1239e705b5..c763bfeb1f38fb 100644 --- a/sound/soc/intel/boards/haswell.c +++ b/sound/soc/intel/boards/haswell.c @@ -189,6 +189,7 @@ static struct platform_driver haswell_audio = { .probe = haswell_audio_probe, .driver = { .name = "haswell-audio", + .pm = &snd_soc_pm_ops, }, }; diff --git a/sound/soc/intel/boards/sof_maxim_common.c b/sound/soc/intel/boards/sof_maxim_common.c index b6e63ea13d64e2..c2a9757181fe16 100644 --- a/sound/soc/intel/boards/sof_maxim_common.c +++ b/sound/soc/intel/boards/sof_maxim_common.c @@ -49,11 +49,11 @@ static int max98373_hw_params(struct snd_pcm_substream *substream, for_each_rtd_codec_dais(rtd, j, codec_dai) { if (!strcmp(codec_dai->component->name, MAX_98373_DEV0_NAME)) { /* DEV0 tdm slot configuration */ - snd_soc_dai_set_tdm_slot(codec_dai, 0x03, 3, 8, 24); + snd_soc_dai_set_tdm_slot(codec_dai, 0x03, 3, 8, 32); } if (!strcmp(codec_dai->component->name, MAX_98373_DEV1_NAME)) { /* DEV1 tdm slot configuration */ - snd_soc_dai_set_tdm_slot(codec_dai, 0x0C, 3, 8, 24); + snd_soc_dai_set_tdm_slot(codec_dai, 0x0C, 3, 8, 32); } } return 0; diff --git a/sound/soc/intel/skylake/cnl-sst.c b/sound/soc/intel/skylake/cnl-sst.c index fcd8dff27ae8e8..1275c149acc021 100644 --- a/sound/soc/intel/skylake/cnl-sst.c +++ b/sound/soc/intel/skylake/cnl-sst.c @@ -224,6 +224,7 @@ static int cnl_set_dsp_D0(struct sst_dsp *ctx, unsigned int core_id) "dsp boot timeout, status=%#x error=%#x\n", sst_dsp_shim_read(ctx, CNL_ADSP_FW_STATUS), sst_dsp_shim_read(ctx, CNL_ADSP_ERROR_CODE)); + ret = -ETIMEDOUT; goto err; } } else { diff --git a/sound/soc/intel/skylake/skl-topology.c b/sound/soc/intel/skylake/skl-topology.c index 40bee10b0c65a4..d699e61eca3d07 100644 --- a/sound/soc/intel/skylake/skl-topology.c +++ b/sound/soc/intel/skylake/skl-topology.c @@ -3619,15 +3619,16 @@ static void skl_tplg_complete(struct snd_soc_component *component) list_for_each_entry(dobj, &component->dobj_list, list) { struct snd_kcontrol *kcontrol = dobj->control.kcontrol; - struct soc_enum *se = - (struct soc_enum *)kcontrol->private_value; - char **texts = dobj->control.dtexts; + struct soc_enum *se; + char **texts; char chan_text[4]; - if (dobj->type != SND_SOC_DOBJ_ENUM || - dobj->control.kcontrol->put != - skl_tplg_multi_config_set_dmic) + if (dobj->type != SND_SOC_DOBJ_ENUM || !kcontrol || + kcontrol->put != skl_tplg_multi_config_set_dmic) continue; + + se = (struct soc_enum *)kcontrol->private_value; + texts = dobj->control.dtexts; sprintf(chan_text, "c%d", mach->mach_params.dmic_num); for (i = 0; i < se->items; i++) { diff --git a/sound/soc/jz4740/jz4740-i2s.c b/sound/soc/jz4740/jz4740-i2s.c index c7bd20104b2047..0793e284d0e78d 100644 --- a/sound/soc/jz4740/jz4740-i2s.c +++ b/sound/soc/jz4740/jz4740-i2s.c @@ -312,10 +312,14 @@ static int jz4740_i2s_set_sysclk(struct snd_soc_dai *dai, int clk_id, switch (clk_id) { case JZ4740_I2S_CLKSRC_EXT: parent = clk_get(NULL, "ext"); + if (IS_ERR(parent)) + return PTR_ERR(parent); clk_set_parent(i2s->clk_i2s, parent); break; case JZ4740_I2S_CLKSRC_PLL: parent = clk_get(NULL, "pll half"); + if (IS_ERR(parent)) + return PTR_ERR(parent); clk_set_parent(i2s->clk_i2s, parent); ret = clk_set_rate(i2s->clk_i2s, freq); break; diff --git a/sound/soc/mediatek/mt8183/mt8183-da7219-max98357.c b/sound/soc/mediatek/mt8183/mt8183-da7219-max98357.c index 26e7d9a7198f80..20d31b69a5c00b 100644 --- a/sound/soc/mediatek/mt8183/mt8183-da7219-max98357.c +++ b/sound/soc/mediatek/mt8183/mt8183-da7219-max98357.c @@ -532,6 +532,7 @@ static struct snd_soc_dai_link mt8183_da7219_dai_links[] = { .dpcm_playback = 1, .ignore_suspend = 1, .be_hw_params_fixup = mt8183_i2s_hw_params_fixup, + .ignore = 1, .init = mt8183_da7219_max98357_hdmi_init, SND_SOC_DAILINK_REG(tdm), }, @@ -754,8 +755,10 @@ static int mt8183_da7219_max98357_dev_probe(struct platform_device *pdev) } } - if (hdmi_codec && strcmp(dai_link->name, "TDM") == 0) + if (hdmi_codec && strcmp(dai_link->name, "TDM") == 0) { dai_link->codecs->of_node = hdmi_codec; + dai_link->ignore = 0; + } if (!dai_link->platforms->name) dai_link->platforms->of_node = platform_node; diff --git a/sound/soc/mediatek/mt8183/mt8183-mt6358-ts3a227-max98357.c b/sound/soc/mediatek/mt8183/mt8183-mt6358-ts3a227-max98357.c index 327dfad41e3102..79ba2f2d845223 100644 --- a/sound/soc/mediatek/mt8183/mt8183-mt6358-ts3a227-max98357.c +++ b/sound/soc/mediatek/mt8183/mt8183-mt6358-ts3a227-max98357.c @@ -515,6 +515,7 @@ static struct snd_soc_dai_link mt8183_mt6358_ts3a227_dai_links[] = { .ignore_suspend = 1, .be_hw_params_fixup = mt8183_i2s_hw_params_fixup, .ops = &mt8183_mt6358_tdm_ops, + .ignore = 1, .init = mt8183_mt6358_ts3a227_max98357_hdmi_init, SND_SOC_DAILINK_REG(tdm), }, @@ -661,8 +662,10 @@ mt8183_mt6358_ts3a227_max98357_dev_probe(struct platform_device *pdev) SND_SOC_DAIFMT_CBM_CFM; } - if (hdmi_codec && strcmp(dai_link->name, "TDM") == 0) + if (hdmi_codec && strcmp(dai_link->name, "TDM") == 0) { dai_link->codecs->of_node = hdmi_codec; + dai_link->ignore = 0; + } if (!dai_link->platforms->name) dai_link->platforms->of_node = platform_node; diff --git a/sound/soc/meson/Kconfig b/sound/soc/meson/Kconfig index 363dc3b1bbe470..ce0cbdc69b2ec9 100644 --- a/sound/soc/meson/Kconfig +++ b/sound/soc/meson/Kconfig @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only menu "ASoC support for Amlogic platforms" - depends on ARCH_MESON || COMPILE_TEST + depends on ARCH_MESON || (COMPILE_TEST && COMMON_CLK) config SND_MESON_AIU tristate "Amlogic AIU" diff --git a/sound/soc/meson/axg-tdm-interface.c b/sound/soc/meson/axg-tdm-interface.c index c8664ab80d45ad..87cac440b36933 100644 --- a/sound/soc/meson/axg-tdm-interface.c +++ b/sound/soc/meson/axg-tdm-interface.c @@ -467,8 +467,20 @@ static int axg_tdm_iface_set_bias_level(struct snd_soc_component *component, return ret; } +static const struct snd_soc_dapm_widget axg_tdm_iface_dapm_widgets[] = { + SND_SOC_DAPM_SIGGEN("Playback Signal"), +}; + +static const struct snd_soc_dapm_route axg_tdm_iface_dapm_routes[] = { + { "Loopback", NULL, "Playback Signal" }, +}; + static const struct snd_soc_component_driver axg_tdm_iface_component_drv = { - .set_bias_level = axg_tdm_iface_set_bias_level, + .dapm_widgets = axg_tdm_iface_dapm_widgets, + .num_dapm_widgets = ARRAY_SIZE(axg_tdm_iface_dapm_widgets), + .dapm_routes = axg_tdm_iface_dapm_routes, + .num_dapm_routes = ARRAY_SIZE(axg_tdm_iface_dapm_routes), + .set_bias_level = axg_tdm_iface_set_bias_level, }; static const struct of_device_id axg_tdm_iface_of_match[] = { diff --git a/sound/soc/meson/axg-tdmin.c b/sound/soc/meson/axg-tdmin.c index 88ed95ae886bb5..b4faf9d5c1aad1 100644 --- a/sound/soc/meson/axg-tdmin.c +++ b/sound/soc/meson/axg-tdmin.c @@ -224,15 +224,6 @@ static const struct axg_tdm_formatter_ops axg_tdmin_ops = { }; static const struct axg_tdm_formatter_driver axg_tdmin_drv = { - .component_drv = &axg_tdmin_component_drv, - .regmap_cfg = &axg_tdmin_regmap_cfg, - .ops = &axg_tdmin_ops, - .quirks = &(const struct axg_tdm_formatter_hw) { - .skew_offset = 2, - }, -}; - -static const struct axg_tdm_formatter_driver g12a_tdmin_drv = { .component_drv = &axg_tdmin_component_drv, .regmap_cfg = &axg_tdmin_regmap_cfg, .ops = &axg_tdmin_ops, @@ -247,10 +238,10 @@ static const struct of_device_id axg_tdmin_of_match[] = { .data = &axg_tdmin_drv, }, { .compatible = "amlogic,g12a-tdmin", - .data = &g12a_tdmin_drv, + .data = &axg_tdmin_drv, }, { .compatible = "amlogic,sm1-tdmin", - .data = &g12a_tdmin_drv, + .data = &axg_tdmin_drv, }, {} }; MODULE_DEVICE_TABLE(of, axg_tdmin_of_match); diff --git a/sound/soc/qcom/Kconfig b/sound/soc/qcom/Kconfig index 2696ffcba880fa..a824f793811bed 100644 --- a/sound/soc/qcom/Kconfig +++ b/sound/soc/qcom/Kconfig @@ -106,6 +106,7 @@ config SND_SOC_QDSP6 config SND_SOC_MSM8996 tristate "SoC Machine driver for MSM8996 and APQ8096 boards" depends on QCOM_APR + depends on COMMON_CLK select SND_SOC_QDSP6 select SND_SOC_QCOM_COMMON help diff --git a/sound/soc/qcom/common.c b/sound/soc/qcom/common.c index 54660f126d09ea..09af007007007b 100644 --- a/sound/soc/qcom/common.c +++ b/sound/soc/qcom/common.c @@ -58,7 +58,7 @@ int qcom_snd_parse_of(struct snd_soc_card *card) dlc = devm_kzalloc(dev, 2 * sizeof(*dlc), GFP_KERNEL); if (!dlc) { ret = -ENOMEM; - goto err; + goto err_put_np; } link->cpus = &dlc[0]; @@ -70,7 +70,7 @@ int qcom_snd_parse_of(struct snd_soc_card *card) ret = of_property_read_string(np, "link-name", &link->name); if (ret) { dev_err(card->dev, "error getting codec dai_link name\n"); - goto err; + goto err_put_np; } cpu = of_get_child_by_name(np, "cpu"); @@ -130,8 +130,10 @@ int qcom_snd_parse_of(struct snd_soc_card *card) } else { /* DPCM frontend */ dlc = devm_kzalloc(dev, sizeof(*dlc), GFP_KERNEL); - if (!dlc) - return -ENOMEM; + if (!dlc) { + ret = -ENOMEM; + goto err; + } link->codecs = dlc; link->num_codecs = 1; @@ -158,10 +160,11 @@ int qcom_snd_parse_of(struct snd_soc_card *card) return 0; err: - of_node_put(np); of_node_put(cpu); of_node_put(codec); of_node_put(platform); +err_put_np: + of_node_put(np); return ret; } EXPORT_SYMBOL(qcom_snd_parse_of); diff --git a/sound/soc/qcom/lpass-cpu.c b/sound/soc/qcom/lpass-cpu.c index 426235a217ec60..46bb24afeacf0b 100644 --- a/sound/soc/qcom/lpass-cpu.c +++ b/sound/soc/qcom/lpass-cpu.c @@ -270,18 +270,6 @@ static int lpass_cpu_daiops_trigger(struct snd_pcm_substream *substream, struct lpaif_i2sctl *i2sctl = drvdata->i2sctl; unsigned int id = dai->driver->id; int ret = -EINVAL; - unsigned int val = 0; - - ret = regmap_read(drvdata->lpaif_map, - LPAIF_I2SCTL_REG(drvdata->variant, dai->driver->id), &val); - if (ret) { - dev_err(dai->dev, "error reading from i2sctl reg: %d\n", ret); - return ret; - } - if (val == LPAIF_I2SCTL_RESET_STATE) { - dev_err(dai->dev, "error in i2sctl register state\n"); - return -ENOTRECOVERABLE; - } switch (cmd) { case SNDRV_PCM_TRIGGER_START: @@ -356,8 +344,30 @@ int asoc_qcom_lpass_cpu_dai_probe(struct snd_soc_dai *dai) } EXPORT_SYMBOL_GPL(asoc_qcom_lpass_cpu_dai_probe); +static int asoc_qcom_of_xlate_dai_name(struct snd_soc_component *component, + struct of_phandle_args *args, + const char **dai_name) +{ + struct lpass_data *drvdata = snd_soc_component_get_drvdata(component); + struct lpass_variant *variant = drvdata->variant; + int id = args->args[0]; + int ret = -EINVAL; + int i; + + for (i = 0; i < variant->num_dai; i++) { + if (variant->dai_driver[i].id == id) { + *dai_name = variant->dai_driver[i].name; + ret = 0; + break; + } + } + + return ret; +} + static const struct snd_soc_component_driver lpass_cpu_comp_driver = { .name = "lpass-cpu", + .of_xlate_dai_name = asoc_qcom_of_xlate_dai_name, }; static bool lpass_cpu_regmap_writeable(struct device *dev, unsigned int reg) @@ -454,20 +464,16 @@ static bool lpass_cpu_regmap_volatile(struct device *dev, unsigned int reg) struct lpass_variant *v = drvdata->variant; int i; - for (i = 0; i < v->i2s_ports; ++i) - if (reg == LPAIF_I2SCTL_REG(v, i)) - return true; for (i = 0; i < v->irq_ports; ++i) if (reg == LPAIF_IRQSTAT_REG(v, i)) return true; for (i = 0; i < v->rdma_channels; ++i) - if (reg == LPAIF_RDMACURR_REG(v, i) || reg == LPAIF_RDMACTL_REG(v, i)) + if (reg == LPAIF_RDMACURR_REG(v, i)) return true; for (i = 0; i < v->wrdma_channels; ++i) - if (reg == LPAIF_WRDMACURR_REG(v, i + v->wrdma_channel_start) || - reg == LPAIF_WRDMACTL_REG(v, i + v->wrdma_channel_start)) + if (reg == LPAIF_WRDMACURR_REG(v, i + v->wrdma_channel_start)) return true; return false; diff --git a/sound/soc/qcom/lpass-hdmi.c b/sound/soc/qcom/lpass-hdmi.c index 172952d3a5d66f..abfb8737a89f41 100644 --- a/sound/soc/qcom/lpass-hdmi.c +++ b/sound/soc/qcom/lpass-hdmi.c @@ -24,7 +24,7 @@ static int lpass_hdmi_daiops_hw_params(struct snd_pcm_substream *substream, unsigned int rate = params_rate(params); unsigned int channels = params_channels(params); unsigned int ret; - unsigned int bitwidth; + int bitwidth; unsigned int word_length; unsigned int ch_sts_buf0; unsigned int ch_sts_buf1; diff --git a/sound/soc/qcom/lpass-ipq806x.c b/sound/soc/qcom/lpass-ipq806x.c index 832a9161484e7f..3a45e6a26f04b2 100644 --- a/sound/soc/qcom/lpass-ipq806x.c +++ b/sound/soc/qcom/lpass-ipq806x.c @@ -131,7 +131,7 @@ static struct lpass_variant ipq806x_data = { .micmode = REG_FIELD_ID(0x0010, 4, 7, 5, 0x4), .micmono = REG_FIELD_ID(0x0010, 3, 3, 5, 0x4), .wssrc = REG_FIELD_ID(0x0010, 2, 2, 5, 0x4), - .bitwidth = REG_FIELD_ID(0x0010, 0, 0, 5, 0x4), + .bitwidth = REG_FIELD_ID(0x0010, 0, 1, 5, 0x4), .rdma_dyncclk = REG_FIELD_ID(0x6000, 12, 12, 4, 0x1000), .rdma_bursten = REG_FIELD_ID(0x6000, 11, 11, 4, 0x1000), diff --git a/sound/soc/qcom/lpass-lpaif-reg.h b/sound/soc/qcom/lpass-lpaif-reg.h index 405542832e9941..baf72f124ea9b2 100644 --- a/sound/soc/qcom/lpass-lpaif-reg.h +++ b/sound/soc/qcom/lpass-lpaif-reg.h @@ -133,7 +133,7 @@ #define LPAIF_WRDMAPERCNT_REG(v, chan) LPAIF_WRDMA_REG_ADDR(v, 0x14, (chan)) #define LPAIF_INTFDMA_REG(v, chan, reg, dai_id) \ - ((v->dai_driver[dai_id].id == LPASS_DP_RX) ? \ + ((dai_id == LPASS_DP_RX) ? \ LPAIF_HDMI_RDMA##reg##_REG(v, chan) : \ LPAIF_RDMA##reg##_REG(v, chan)) diff --git a/sound/soc/qcom/lpass-platform.c b/sound/soc/qcom/lpass-platform.c index 80b09dede5f9cb..71122e9eb23053 100644 --- a/sound/soc/qcom/lpass-platform.c +++ b/sound/soc/qcom/lpass-platform.c @@ -257,6 +257,9 @@ static int lpass_platform_pcmops_hw_params(struct snd_soc_component *component, break; case MI2S_PRIMARY: case MI2S_SECONDARY: + case MI2S_TERTIARY: + case MI2S_QUATERNARY: + case MI2S_QUINARY: ret = regmap_fields_write(dmactl->intf, id, LPAIF_DMACTL_AUDINTF(dma_port)); if (ret) { @@ -452,7 +455,6 @@ static int lpass_platform_pcmops_trigger(struct snd_soc_component *component, unsigned int reg_irqclr = 0, val_irqclr = 0; unsigned int reg_irqen = 0, val_irqen = 0, val_mask = 0; unsigned int dai_id = cpu_dai->driver->id; - unsigned int dma_ctrl_reg = 0; ch = pcm_data->dma_ch; if (dir == SNDRV_PCM_STREAM_PLAYBACK) { @@ -469,17 +471,7 @@ static int lpass_platform_pcmops_trigger(struct snd_soc_component *component, id = pcm_data->dma_ch - v->wrdma_channel_start; map = drvdata->lpaif_map; } - ret = regmap_read(map, LPAIF_DMACTL_REG(v, ch, dir, dai_id), &dma_ctrl_reg); - if (ret) { - dev_err(soc_runtime->dev, "error reading from rdmactl reg: %d\n", ret); - return ret; - } - if (dma_ctrl_reg == LPAIF_DMACTL_RESET_STATE || - dma_ctrl_reg == LPAIF_DMACTL_RESET_STATE + 1) { - dev_err(soc_runtime->dev, "error in rdmactl register state\n"); - return -ENOTRECOVERABLE; - } switch (cmd) { case SNDRV_PCM_TRIGGER_START: case SNDRV_PCM_TRIGGER_RESUME: @@ -500,7 +492,6 @@ static int lpass_platform_pcmops_trigger(struct snd_soc_component *component, "error writing to rdmactl reg: %d\n", ret); return ret; } - map = drvdata->hdmiif_map; reg_irqclr = LPASS_HDMITX_APP_IRQCLEAR_REG(v); val_irqclr = (LPAIF_IRQ_ALL(ch) | LPAIF_IRQ_HDMI_REQ_ON_PRELOAD(ch) | @@ -519,7 +510,9 @@ static int lpass_platform_pcmops_trigger(struct snd_soc_component *component, break; case MI2S_PRIMARY: case MI2S_SECONDARY: - map = drvdata->lpaif_map; + case MI2S_TERTIARY: + case MI2S_QUATERNARY: + case MI2S_QUINARY: reg_irqclr = LPAIF_IRQCLEAR_REG(v, LPAIF_IRQ_PORT_HOST); val_irqclr = LPAIF_IRQ_ALL(ch); @@ -563,7 +556,6 @@ static int lpass_platform_pcmops_trigger(struct snd_soc_component *component, "error writing to rdmactl reg: %d\n", ret); return ret; } - map = drvdata->hdmiif_map; reg_irqen = LPASS_HDMITX_APP_IRQEN_REG(v); val_mask = (LPAIF_IRQ_ALL(ch) | LPAIF_IRQ_HDMI_REQ_ON_PRELOAD(ch) | @@ -573,7 +565,9 @@ static int lpass_platform_pcmops_trigger(struct snd_soc_component *component, break; case MI2S_PRIMARY: case MI2S_SECONDARY: - map = drvdata->lpaif_map; + case MI2S_TERTIARY: + case MI2S_QUATERNARY: + case MI2S_QUINARY: reg_irqen = LPAIF_IRQEN_REG(v, LPAIF_IRQ_PORT_HOST); val_mask = LPAIF_IRQ_ALL(ch); val_irqen = 0; @@ -670,6 +664,9 @@ static irqreturn_t lpass_dma_interrupt_handler( break; case MI2S_PRIMARY: case MI2S_SECONDARY: + case MI2S_TERTIARY: + case MI2S_QUATERNARY: + case MI2S_QUINARY: map = drvdata->lpaif_map; reg = LPAIF_IRQCLEAR_REG(v, LPAIF_IRQ_PORT_HOST); val = 0; diff --git a/sound/soc/qcom/lpass-sc7180.c b/sound/soc/qcom/lpass-sc7180.c index bc998d50160000..c647e627897a22 100644 --- a/sound/soc/qcom/lpass-sc7180.c +++ b/sound/soc/qcom/lpass-sc7180.c @@ -20,7 +20,7 @@ #include "lpass.h" static struct snd_soc_dai_driver sc7180_lpass_cpu_dai_driver[] = { - [MI2S_PRIMARY] = { + { .id = MI2S_PRIMARY, .name = "Primary MI2S", .playback = { @@ -43,9 +43,7 @@ static struct snd_soc_dai_driver sc7180_lpass_cpu_dai_driver[] = { }, .probe = &asoc_qcom_lpass_cpu_dai_probe, .ops = &asoc_qcom_lpass_cpu_dai_ops, - }, - - [MI2S_SECONDARY] = { + }, { .id = MI2S_SECONDARY, .name = "Secondary MI2S", .playback = { @@ -59,8 +57,7 @@ static struct snd_soc_dai_driver sc7180_lpass_cpu_dai_driver[] = { }, .probe = &asoc_qcom_lpass_cpu_dai_probe, .ops = &asoc_qcom_lpass_cpu_dai_ops, - }, - [LPASS_DP_RX] = { + }, { .id = LPASS_DP_RX, .name = "Hdmi", .playback = { diff --git a/sound/soc/qcom/lpass.h b/sound/soc/qcom/lpass.h index bccd1a05d771e9..868c1c8dbd4553 100644 --- a/sound/soc/qcom/lpass.h +++ b/sound/soc/qcom/lpass.h @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include "lpass-hdmi.h" #define LPASS_AHBIX_CLOCK_FREQUENCY 131072000 diff --git a/sound/soc/qcom/qdsp6/q6afe-clocks.c b/sound/soc/qcom/qdsp6/q6afe-clocks.c index 2efc2eaa04243a..acfc0c698f6a19 100644 --- a/sound/soc/qcom/qdsp6/q6afe-clocks.c +++ b/sound/soc/qcom/qdsp6/q6afe-clocks.c @@ -16,6 +16,7 @@ .afe_clk_id = Q6AFE_##id, \ .name = #id, \ .attributes = LPASS_CLK_ATTRIBUTE_COUPLE_NO, \ + .rate = 19200000, \ .hw.init = &(struct clk_init_data) { \ .ops = &clk_q6afe_ops, \ .name = #id, \ diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index 7f87b449f950b8..148c095df27b12 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -2486,6 +2486,7 @@ void snd_soc_dapm_free_widget(struct snd_soc_dapm_widget *w) enum snd_soc_dapm_direction dir; list_del(&w->list); + list_del(&w->dirty); /* * remove source and sink paths associated to this widget. * While removing the path, remove reference to it from both diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c index dcab9527ba3d7e..91bf339581590a 100644 --- a/sound/soc/soc-pcm.c +++ b/sound/soc/soc-pcm.c @@ -2231,6 +2231,7 @@ static int dpcm_fe_dai_do_trigger(struct snd_pcm_substream *substream, int cmd) case SNDRV_PCM_TRIGGER_START: case SNDRV_PCM_TRIGGER_RESUME: case SNDRV_PCM_TRIGGER_PAUSE_RELEASE: + case SNDRV_PCM_TRIGGER_DRAIN: ret = dpcm_dai_trigger_fe_be(substream, cmd, true); break; case SNDRV_PCM_TRIGGER_STOP: @@ -2248,6 +2249,7 @@ static int dpcm_fe_dai_do_trigger(struct snd_pcm_substream *substream, int cmd) case SNDRV_PCM_TRIGGER_START: case SNDRV_PCM_TRIGGER_RESUME: case SNDRV_PCM_TRIGGER_PAUSE_RELEASE: + case SNDRV_PCM_TRIGGER_DRAIN: ret = dpcm_dai_trigger_fe_be(substream, cmd, false); break; case SNDRV_PCM_TRIGGER_STOP: diff --git a/sound/soc/soc-topology.c b/sound/soc/soc-topology.c index c5ef432a023bae..1030e11017b274 100644 --- a/sound/soc/soc-topology.c +++ b/sound/soc/soc-topology.c @@ -506,7 +506,7 @@ static void remove_dai(struct snd_soc_component *comp, { struct snd_soc_dai_driver *dai_drv = container_of(dobj, struct snd_soc_dai_driver, dobj); - struct snd_soc_dai *dai; + struct snd_soc_dai *dai, *_dai; if (pass != SOC_TPLG_PASS_PCM_DAI) return; @@ -514,9 +514,9 @@ static void remove_dai(struct snd_soc_component *comp, if (dobj->ops && dobj->ops->dai_unload) dobj->ops->dai_unload(comp, dobj); - for_each_component_dais(comp, dai) + for_each_component_dais_safe(comp, dai, _dai) if (dai->driver == dai_drv) - dai->driver = NULL; + snd_soc_unregister_dai(dai); kfree(dai_drv->playback.stream_name); kfree(dai_drv->capture.stream_name); @@ -987,7 +987,7 @@ static int soc_tplg_denum_create_values(struct soc_enum *se, return -EINVAL; se->dobj.control.dvalues = kzalloc(le32_to_cpu(ec->items) * - sizeof(u32), + sizeof(*se->dobj.control.dvalues), GFP_KERNEL); if (!se->dobj.control.dvalues) return -ENOMEM; @@ -1876,7 +1876,7 @@ static int soc_tplg_dai_create(struct soc_tplg *tplg, list_add(&dai_drv->dobj.list, &tplg->comp->dobj_list); /* register the DAI to the component */ - dai = devm_snd_soc_register_dai(tplg->comp->dev, tplg->comp, dai_drv, false); + dai = snd_soc_register_dai(tplg->comp, dai_drv, false); if (!dai) return -ENOMEM; @@ -1884,6 +1884,7 @@ static int soc_tplg_dai_create(struct soc_tplg *tplg, ret = snd_soc_dapm_new_dai_widgets(dapm, dai); if (ret != 0) { dev_err(dai->dev, "Failed to create DAI widgets %d\n", ret); + snd_soc_unregister_dai(dai); return ret; } diff --git a/sound/soc/sof/intel/Kconfig b/sound/soc/sof/intel/Kconfig index a066e08860cbfc..de7ff2d097ab9d 100644 --- a/sound/soc/sof/intel/Kconfig +++ b/sound/soc/sof/intel/Kconfig @@ -271,6 +271,7 @@ config SND_SOC_SOF_JASPERLAKE config SND_SOC_SOF_HDA_COMMON tristate + select SND_INTEL_DSP_CONFIG select SND_SOC_SOF_INTEL_COMMON select SND_SOC_SOF_HDA_LINK_BASELINE help @@ -330,14 +331,13 @@ config SND_SOC_SOF_HDA tristate select SND_HDA_EXT_CORE if SND_SOC_SOF_HDA_LINK select SND_SOC_HDAC_HDA if SND_SOC_SOF_HDA_AUDIO_CODEC - select SND_INTEL_DSP_CONFIG help This option is not user-selectable but automagically handled by 'select' statements at a higher level config SND_SOC_SOF_INTEL_SOUNDWIRE_LINK bool "SOF support for SoundWire" - depends on SOUNDWIRE && ACPI + depends on ACPI help This adds support for SoundWire with Sound Open Firmware for Intel(R) platforms. @@ -353,6 +353,7 @@ config SND_SOC_SOF_INTEL_SOUNDWIRE_LINK_BASELINE config SND_SOC_SOF_INTEL_SOUNDWIRE tristate + select SOUNDWIRE select SOUNDWIRE_INTEL help This option is not user-selectable but automagically handled by diff --git a/sound/soc/sof/intel/hda-codec.c b/sound/soc/sof/intel/hda-codec.c index 6875fa570c2c55..8d65004c917a19 100644 --- a/sound/soc/sof/intel/hda-codec.c +++ b/sound/soc/sof/intel/hda-codec.c @@ -93,8 +93,7 @@ void hda_codec_jack_check(struct snd_sof_dev *sdev) * has been recorded in STATESTS */ if (codec->jacktbl.used) - schedule_delayed_work(&codec->jackpoll_work, - codec->jackpoll_interval); + pm_request_resume(&codec->core.dev); } #else void hda_codec_jack_wake_enable(struct snd_sof_dev *sdev) {} @@ -156,7 +155,8 @@ static int hda_codec_probe(struct snd_sof_dev *sdev, int address, if (!hdev->bus->audio_component) { dev_dbg(sdev->dev, "iDisp hw present but no driver\n"); - goto error; + ret = -ENOENT; + goto out; } hda_priv->need_display_power = true; } @@ -173,24 +173,23 @@ static int hda_codec_probe(struct snd_sof_dev *sdev, int address, * other return codes without modification */ if (ret == 0) - goto error; + ret = -ENOENT; } - return ret; - -error: - snd_hdac_ext_bus_device_exit(hdev); - return -ENOENT; - +out: + if (ret < 0) { + snd_hdac_device_unregister(hdev); + put_device(&hdev->dev); + } #else hdev = devm_kzalloc(sdev->dev, sizeof(*hdev), GFP_KERNEL); if (!hdev) return -ENOMEM; ret = snd_hdac_ext_bus_device_init(&hbus->core, address, hdev, HDA_DEV_ASOC); +#endif return ret; -#endif } /* Codec initialization */ diff --git a/sound/soc/sof/intel/hda-dsp.c b/sound/soc/sof/intel/hda-dsp.c index 18ff1c2f5376e3..2dbc1273e56bd6 100644 --- a/sound/soc/sof/intel/hda-dsp.c +++ b/sound/soc/sof/intel/hda-dsp.c @@ -683,8 +683,10 @@ static int hda_resume(struct snd_sof_dev *sdev, bool runtime_resume) #if IS_ENABLED(CONFIG_SND_SOC_SOF_HDA) /* check jack status */ - if (runtime_resume) - hda_codec_jack_check(sdev); + if (runtime_resume) { + if (sdev->system_suspend_target == SOF_SUSPEND_NONE) + hda_codec_jack_check(sdev); + } /* turn off the links that were off before suspend */ list_for_each_entry(hlink, &bus->hlink_list, list) { diff --git a/sound/soc/sunxi/sun4i-i2s.c b/sound/soc/sunxi/sun4i-i2s.c index f23ff29e7c1d32..a994b5cf87b313 100644 --- a/sound/soc/sunxi/sun4i-i2s.c +++ b/sound/soc/sunxi/sun4i-i2s.c @@ -450,11 +450,11 @@ static int sun8i_i2s_set_chan_cfg(const struct sun4i_i2s *i2s, switch (i2s->format & SND_SOC_DAIFMT_FORMAT_MASK) { case SND_SOC_DAIFMT_DSP_A: case SND_SOC_DAIFMT_DSP_B: - case SND_SOC_DAIFMT_LEFT_J: - case SND_SOC_DAIFMT_RIGHT_J: lrck_period = params_physical_width(params) * slots; break; + case SND_SOC_DAIFMT_LEFT_J: + case SND_SOC_DAIFMT_RIGHT_J: case SND_SOC_DAIFMT_I2S: lrck_period = params_physical_width(params); break; diff --git a/sound/usb/card.c b/sound/usb/card.c index 4457214a3ae62e..57d6d4ff01e088 100644 --- a/sound/usb/card.c +++ b/sound/usb/card.c @@ -382,6 +382,9 @@ static const struct usb_audio_device_name usb_audio_names[] = { /* ASUS ROG Strix */ PROFILE_NAME(0x0b05, 0x1917, "Realtek", "ALC1220-VB-DT", "Realtek-ALC1220-VB-Desktop"), + /* ASUS PRIME TRX40 PRO-S */ + PROFILE_NAME(0x0b05, 0x1918, + "Realtek", "ALC1220-VB-DT", "Realtek-ALC1220-VB-Desktop"), /* Dell WD15 Dock */ PROFILE_NAME(0x0bda, 0x4014, "Dell", "WD15 Dock", "Dell-WD15-Dock"), diff --git a/sound/usb/clock.c b/sound/usb/clock.c index f3ca59005d9143..674e15bf98ed5c 100644 --- a/sound/usb/clock.c +++ b/sound/usb/clock.c @@ -531,6 +531,12 @@ static int set_sample_rate_v1(struct snd_usb_audio *chip, int iface, } crate = data[0] | (data[1] << 8) | (data[2] << 16); + if (!crate) { + dev_info(&dev->dev, "failed to read current rate; disabling the check\n"); + chip->sample_rate_read_error = 3; /* three strikes, see above */ + return 0; + } + if (crate != rate) { dev_warn(&dev->dev, "current rate %d is different from the runtime rate %d\n", crate, rate); // runtime->rate = crate; diff --git a/sound/usb/format.c b/sound/usb/format.c index 3bfead393aa346..91f0ed4a2e7eb1 100644 --- a/sound/usb/format.c +++ b/sound/usb/format.c @@ -40,6 +40,8 @@ static u64 parse_audio_format_i_type(struct snd_usb_audio *chip, case UAC_VERSION_1: default: { struct uac_format_type_i_discrete_descriptor *fmt = _fmt; + if (format >= 64) + return 0; /* invalid format */ sample_width = fmt->bBitResolution; sample_bytes = fmt->bSubframeSize; format = 1ULL << format; diff --git a/sound/usb/midi.c b/sound/usb/midi.c index c8213652470c4f..0c23fa6d8525da 100644 --- a/sound/usb/midi.c +++ b/sound/usb/midi.c @@ -1889,6 +1889,8 @@ static int snd_usbmidi_get_ms_info(struct snd_usb_midi *umidi, ms_ep = find_usb_ms_endpoint_descriptor(hostep); if (!ms_ep) continue; + if (ms_ep->bNumEmbMIDIJack > 0x10) + continue; if (usb_endpoint_dir_out(ep)) { if (endpoints[epidx].out_ep) { if (++epidx >= MIDI_MAX_ENDPOINTS) { @@ -2141,6 +2143,8 @@ static int snd_usbmidi_detect_roland(struct snd_usb_midi *umidi, cs_desc[1] == USB_DT_CS_INTERFACE && cs_desc[2] == 0xf1 && cs_desc[3] == 0x02) { + if (cs_desc[4] > 0x10 || cs_desc[5] > 0x10) + continue; endpoint->in_cables = (1 << cs_desc[4]) - 1; endpoint->out_cables = (1 << cs_desc[5]) - 1; return snd_usbmidi_detect_endpoints(umidi, endpoint, 1); diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index c50be2f75f7027..f82c2ab809c1df 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1799,6 +1799,7 @@ u64 snd_usb_interface_dsd_format_quirks(struct snd_usb_audio *chip, case 0x25ce: /* Mytek devices */ case 0x278b: /* Rotel? */ case 0x292b: /* Gustard/Ess based devices */ + case 0x2972: /* FiiO devices */ case 0x2ab6: /* T+A devices */ case 0x3353: /* Khadas devices */ case 0x3842: /* EVGA */ diff --git a/sound/usb/stream.c b/sound/usb/stream.c index ca76ba5b5c0b2a..2f6d39c2ba7c8e 100644 --- a/sound/usb/stream.c +++ b/sound/usb/stream.c @@ -193,16 +193,16 @@ static int usb_chmap_ctl_get(struct snd_kcontrol *kcontrol, struct snd_pcm_chmap *info = snd_kcontrol_chip(kcontrol); struct snd_usb_substream *subs = info->private_data; struct snd_pcm_chmap_elem *chmap = NULL; - int i; + int i = 0; - memset(ucontrol->value.integer.value, 0, - sizeof(ucontrol->value.integer.value)); if (subs->cur_audiofmt) chmap = subs->cur_audiofmt->chmap; if (chmap) { for (i = 0; i < chmap->channels; i++) ucontrol->value.integer.value[i] = chmap->map[i]; } + for (; i < subs->channels_max; i++) + ucontrol->value.integer.value[i] = 0; return 0; } diff --git a/tools/bootconfig/scripts/bconf2ftrace.sh b/tools/bootconfig/scripts/bconf2ftrace.sh index 595e164dc352f0..feb30c2c788159 100755 --- a/tools/bootconfig/scripts/bconf2ftrace.sh +++ b/tools/bootconfig/scripts/bconf2ftrace.sh @@ -152,6 +152,7 @@ setup_instance() { # [instance] set_array_of ${instance}.options ${instancedir}/trace_options set_value_of ${instance}.trace_clock ${instancedir}/trace_clock set_value_of ${instance}.cpumask ${instancedir}/tracing_cpumask + set_value_of ${instance}.tracing_on ${instancedir}/tracing_on set_value_of ${instance}.tracer ${instancedir}/current_tracer set_array_of ${instance}.ftrace.filters \ ${instancedir}/set_ftrace_filter diff --git a/tools/bootconfig/scripts/ftrace2bconf.sh b/tools/bootconfig/scripts/ftrace2bconf.sh index 6c0d4b61e0c260..a0c3bcc6da4f36 100755 --- a/tools/bootconfig/scripts/ftrace2bconf.sh +++ b/tools/bootconfig/scripts/ftrace2bconf.sh @@ -221,6 +221,10 @@ instance_options() { # [instance-name] if [ `echo $val | sed -e s/f//g`x != x ]; then emit_kv $PREFIX.cpumask = $val fi + val=`cat $INSTANCE/tracing_on` + if [ `echo $val | sed -e s/f//g`x != x ]; then + emit_kv $PREFIX.tracing_on = $val + fi val= for i in `cat $INSTANCE/set_event`; do diff --git a/tools/bpf/bpftool/net.c b/tools/bpf/bpftool/net.c index 3fae61ef63396d..ff3aa0cf399787 100644 --- a/tools/bpf/bpftool/net.c +++ b/tools/bpf/bpftool/net.c @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include diff --git a/tools/bpf/resolve_btfids/Makefile b/tools/bpf/resolve_btfids/Makefile index 66cb92136de4ad..bf656432ad736f 100644 --- a/tools/bpf/resolve_btfids/Makefile +++ b/tools/bpf/resolve_btfids/Makefile @@ -18,15 +18,6 @@ else endif # always use the host compiler -ifneq ($(LLVM),) -HOSTAR ?= llvm-ar -HOSTCC ?= clang -HOSTLD ?= ld.lld -else -HOSTAR ?= ar -HOSTCC ?= gcc -HOSTLD ?= ld -endif AR = $(HOSTAR) CC = $(HOSTCC) LD = $(HOSTLD) diff --git a/tools/build/Makefile b/tools/build/Makefile index 722f1700d96a8b..bae48e6fa9952a 100644 --- a/tools/build/Makefile +++ b/tools/build/Makefile @@ -15,10 +15,6 @@ endef $(call allow-override,CC,$(CROSS_COMPILE)gcc) $(call allow-override,LD,$(CROSS_COMPILE)ld) -HOSTCC ?= gcc -HOSTLD ?= ld -HOSTAR ?= ar - export HOSTCC HOSTLD HOSTAR ifeq ($(V),1) diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index cdde783f3018b9..89ba522e377dc7 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -90,7 +90,7 @@ __BUILDXX = $(CXX) $(CXXFLAGS) -MD -Wall -Werror -o $@ $(patsubst %.bin,%.cpp,$( ############################### $(OUTPUT)test-all.bin: - $(BUILD) -fstack-protector-all -O2 -D_FORTIFY_SOURCE=2 -ldw -lelf -lnuma -lelf -I/usr/include/slang -lslang $(FLAGS_PERL_EMBED) $(FLAGS_PYTHON_EMBED) -DPACKAGE='"perf"' -lbfd -ldl -lz -llzma -lzstd + $(BUILD) -fstack-protector-all -O2 -D_FORTIFY_SOURCE=2 -ldw -lelf -lnuma -lelf -I/usr/include/slang -lslang $(FLAGS_PERL_EMBED) $(FLAGS_PYTHON_EMBED) -DPACKAGE='"perf"' -lbfd -ldl -lz -llzma -lzstd -lcap $(OUTPUT)test-hello.bin: $(BUILD) diff --git a/tools/gpio/gpio-event-mon.c b/tools/gpio/gpio-event-mon.c index 90c3155f05b1e1..84ae1039b0a877 100644 --- a/tools/gpio/gpio-event-mon.c +++ b/tools/gpio/gpio-event-mon.c @@ -107,8 +107,8 @@ int monitor_device(const char *device_name, ret = -EIO; break; } - fprintf(stdout, "GPIO EVENT at %llu on line %d (%d|%d) ", - event.timestamp_ns, event.offset, event.line_seqno, + fprintf(stdout, "GPIO EVENT at %" PRIu64 " on line %d (%d|%d) ", + (uint64_t)event.timestamp_ns, event.offset, event.line_seqno, event.seqno); switch (event.id) { case GPIO_V2_LINE_EVENT_RISING_EDGE: diff --git a/tools/gpio/gpio-watch.c b/tools/gpio/gpio-watch.c index f229ec62301b7e..41e76d24419224 100644 --- a/tools/gpio/gpio-watch.c +++ b/tools/gpio/gpio-watch.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -86,8 +87,8 @@ int main(int argc, char **argv) return EXIT_FAILURE; } - printf("line %u: %s at %llu\n", - chg.info.offset, event, chg.timestamp_ns); + printf("line %u: %s at %" PRIu64 "\n", + chg.info.offset, event, (uint64_t)chg.timestamp_ns); } } diff --git a/tools/include/uapi/linux/const.h b/tools/include/uapi/linux/const.h index 5ed721ad5b1985..af2a44c08683de 100644 --- a/tools/include/uapi/linux/const.h +++ b/tools/include/uapi/linux/const.h @@ -28,4 +28,9 @@ #define _BITUL(x) (_UL(1) << (x)) #define _BITULL(x) (_ULL(1) << (x)) +#define __ALIGN_KERNEL(x, a) __ALIGN_KERNEL_MASK(x, (typeof(x))(a) - 1) +#define __ALIGN_KERNEL_MASK(x, mask) (((x) + (mask)) & ~(mask)) + +#define __KERNEL_DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d)) + #endif /* _UAPI_LINUX_CONST_H */ diff --git a/tools/include/uapi/linux/fscrypt.h b/tools/include/uapi/linux/fscrypt.h index e5de6033693812..9f4428be3e3626 100644 --- a/tools/include/uapi/linux/fscrypt.h +++ b/tools/include/uapi/linux/fscrypt.h @@ -20,7 +20,6 @@ #define FSCRYPT_POLICY_FLAG_DIRECT_KEY 0x04 #define FSCRYPT_POLICY_FLAG_IV_INO_LBLK_64 0x08 #define FSCRYPT_POLICY_FLAG_IV_INO_LBLK_32 0x10 -#define FSCRYPT_POLICY_FLAGS_VALID 0x1F /* Encryption algorithms */ #define FSCRYPT_MODE_AES_256_XTS 1 @@ -28,7 +27,7 @@ #define FSCRYPT_MODE_AES_128_CBC 5 #define FSCRYPT_MODE_AES_128_CTS 6 #define FSCRYPT_MODE_ADIANTUM 9 -#define __FSCRYPT_MODE_MAX 9 +/* If adding a mode number > 9, update FSCRYPT_MODE_MAX in fscrypt_private.h */ /* * Legacy policy version; ad-hoc KDF and no key verification. @@ -177,7 +176,7 @@ struct fscrypt_get_key_status_arg { #define FS_POLICY_FLAGS_PAD_32 FSCRYPT_POLICY_FLAGS_PAD_32 #define FS_POLICY_FLAGS_PAD_MASK FSCRYPT_POLICY_FLAGS_PAD_MASK #define FS_POLICY_FLAG_DIRECT_KEY FSCRYPT_POLICY_FLAG_DIRECT_KEY -#define FS_POLICY_FLAGS_VALID FSCRYPT_POLICY_FLAGS_VALID +#define FS_POLICY_FLAGS_VALID 0x07 /* contains old flags only */ #define FS_ENCRYPTION_MODE_INVALID 0 /* never used */ #define FS_ENCRYPTION_MODE_AES_256_XTS FSCRYPT_MODE_AES_256_XTS #define FS_ENCRYPTION_MODE_AES_256_GCM 2 /* never used */ diff --git a/tools/labs/.gitignore b/tools/labs/.gitignore new file mode 100644 index 00000000000000..ce629ee0746336 --- /dev/null +++ b/tools/labs/.gitignore @@ -0,0 +1,12 @@ +/skels/ +vmlinux +zImage +serial.pts +rootfs.img +disk1.img +disk2.img +/*core-image-*.ext4 +/*core-image-*.bz2 +.modinst +/out/ +/rootfs diff --git a/tools/labs/Makefile b/tools/labs/Makefile new file mode 100644 index 00000000000000..251455e64df7a6 --- /dev/null +++ b/tools/labs/Makefile @@ -0,0 +1,57 @@ +KDIR ?= $(shell realpath $(PWD)/../..) + +LABS ?= $(shell cd templates && find -mindepth 1 -maxdepth 1 -type d) +MODS = $(shell cd templates && find $(LABS) -mindepth 1 -name Kbuild | xargs dirname) +TODO ?= 0 + +include qemu/Makefile + +skels: + mkdir -p skels + cd templates && find $(LABS) -type f | xargs ./generate_skels.py --output ../skels --todo $(TODO) + rm -f skels/Kbuild + +skels/Kbuild: + echo "# autogenerated, do not edit " > $@ + echo "ccflags-y += -Wno-unused-function -Wno-unused-label -Wno-unused-variable " >> $@ + for i in $(shell cd skels && find -mindepth 1 -name Kbuild | xargs --no-run-if-empty dirname); do echo "obj-m += $$i/" >> $@; done + +build: $(KCONFIG) skels/Kbuild + $(MAKE) -C $(KDIR) M=$(KDIR)/tools/labs/skels ARCH=$(ARCH) modules + for i in $(shell find skels -name Makefile | xargs --no-run-if-empty dirname); do $(MAKE) -C $$i; done + +copy: $(YOCTO_IMAGE) + @qemu/copy.sh $(YOCTO_IMAGE) + +docker-docs: + cd docker && docker-compose build docs-build + cd docker && docker-compose up -d docs-build + cd docker && docker-compose exec -u ubuntu docs-build bash -c "cd /linux/tools/labs && make docs" + +docker-docs-server: + cd docker && docker-compose build docs-server + cd docker && docker-compose up -d docs-server + +docker-kernel: + cd docker && ARG_UID=$(shell id -u) ARG_GID=$(shell id -g) docker-compose build kernel-build + cd docker && docker-compose up -d kernel-build + cd docker && docker-compose exec -u ubuntu kernel-build bash + +stop-docker-kernel: + cd docker && docker-compose down + +docs: + $(MAKE) -C $(KDIR) DOCBOOKS= SPHINXDIRS="teaching" htmldocs + $(MAKE) -C $(KDIR) BUILDDIR=$(KDIR)/Documentation/output/slides DOCBOOKS= SPHINXDIRS="teaching" slides + for i in $(KDIR)/Documentation/output/slides/teaching/lectures/*.html; do name=$$(basename $$i .html); cp $$i $(KDIR)/Documentation/output/teaching/lectures/$$name-slides.html; done + for i in $(KDIR)/Documentation/output/slides/teaching/so2/lec*.html; do name=$$(basename $$i .html); cp $$i $(KDIR)/Documentation/output/teaching/so2/$$name-slides.html; done + cp -r $(KDIR)/Documentation/output/slides/teaching/_static $(KDIR)/Documentation/output/teaching/ + +clean:: + $(MAKE) -C $(KDIR) M=$(KDIR)/tools/labs/skels ARCH=$(ARCH) clean + for i in $(shell find skels -name Makefile | xargs --no-run-if-empty dirname); do $(MAKE) -C $$i clean; done + +clean_skels: + rm -rfi skels + +.PHONY: skels build copy docs docker-docs docker-kernel stop-docker-kernel clean clean_skels diff --git a/tools/labs/Makefile.vmchecker b/tools/labs/Makefile.vmchecker new file mode 100644 index 00000000000000..9a9ada27392b35 --- /dev/null +++ b/tools/labs/Makefile.vmchecker @@ -0,0 +1,38 @@ +KDIR = /home/so2/vm/linux + +include Makefile + +$(YOCTO_IMAGE): clean-slate.$(YOCTO_IMAGE) + cp clean-slate.$(YOCTO_IMAGE) $(YOCTO_IMAGE) + +setup: $(YOCTO_IMAGE) + mkdir -p out + +copy: + if [ -e qemu.mon ]; then exit 1; fi + mkdir $(TEMPDIR) + sudo mount -t ext4 -o loop $(YOCTO_IMAGE) $(TEMPDIR) + find skels -type f \( -name *.ko -or -executable \) | xargs sudo cp -t $(TEMPDIR)/home/root || true + sudo cp -r skels/assignments/*/checker/* -t $(TEMPDIR)/home/root || true + sudo umount $(TEMPDIR) + rmdir $(TEMPDIR) + +extract: + if [ -e qemu.mon ]; then exit 1; fi + mkdir $(TEMPDIR) + sudo mount -t ext4 -o loop $(YOCTO_IMAGE) $(TEMPDIR) + sudo cp $(TEMPDIR)/home/root/run-stdout.vmr out/ && sudo chown so2:so2 out/run-stdout.vmr || true + sudo cp $(TEMPDIR)/home/root/run-stderr.vmr out/ && sudo chown so2:so2 out/run-stderr.vmr || true + sudo umount $(TEMPDIR) + rmdir $(TEMPDIR) + test -f out/run-stdout.vmr || echo "No testing output (likely due to submission errors)." > out/run-stdout.vmr + test -f out/run-stderr.vmr || echo "No testing output (likely due to submission errors)." > out/run-stderr.vmr + +postprocess: + sed -i '/^Linux version /,/^netconsole: network logging started/d' out/run-km.vmr + +destroy: + -rm -rf out + -rm -f $(YOCTO_IMAGE) + +.PHONY: setup copy extract postprocess destroy diff --git a/tools/labs/docker/docker-compose.yml b/tools/labs/docker/docker-compose.yml new file mode 100644 index 00000000000000..5d5cec8d6a6bd3 --- /dev/null +++ b/tools/labs/docker/docker-compose.yml @@ -0,0 +1,36 @@ +version: '3' +services: + docs-build: + tty: true + build: + context: . + dockerfile: ./docs/Dockerfile + volumes: + - ../../../:/linux + environment: + # workaround for binfmt_misc support in containers + - SPHINX_DITAA_CMD=jexec + - SPHINX_DITAA_ARG=/usr/bin/ditaa + docs-server: + build: + context: . + dockerfile: ./docs/Dockerfile.server + volumes: + - ../../../:/linux + ports: + - 8000:8000 + kernel-build: + privileged: true + tty: true + hostname: "so2" + environment: + - DISPLAY + build: + context: . + dockerfile: ./kernel/Dockerfile + args: + ARG_UID: ${ARG_UID} + ARG_GID: ${ARG_GID} + volumes: + - ../../../:/linux + - /tmp/.X11-unix:/tmp/.X11-unix diff --git a/tools/labs/docker/docs/Dockerfile b/tools/labs/docker/docs/Dockerfile new file mode 100644 index 00000000000000..6a8c3472a3a3a3 --- /dev/null +++ b/tools/labs/docker/docs/Dockerfile @@ -0,0 +1,37 @@ +FROM ubuntu:20.04 + +ENV DEBIAN_FRONTEND noninteractive +RUN apt-get update +RUN apt-get install -y apt-utils +RUN apt-get install -y software-properties-common +RUN apt-get install -y sudo +RUN apt-get install -y make +RUN apt-get install -y git +RUN apt-get install -y python3 +RUN update-alternatives --install /usr/bin/python python /usr/bin/python3 1 +RUN apt-get install -y python3-pip +RUN update-alternatives --install /usr/bin/pip pip /usr/bin/pip3 1 +RUN apt-get install -y ditaa +RUN apt-get install -y graphviz +RUN apt-get install -y imagemagick +RUN apt-get install -y dvipng +RUN apt-get install -y python3-venv +RUN apt-get install -y fonts-noto-cjk +RUN apt-get install -y latexmk +RUN apt-get install -y librsvg2-bin +RUN apt-get install -y texlive-xetex +RUN pip install Sphinx==1.6.7 sphinx_rtd_theme==1.3.0 hieroglyph==1.0 Jinja2==2.11.3 markupsafe==2.0.1 +# append new packages here, to minimize docker rebuild time +RUN rm -rf /var/lib/apt/lists/* + +RUN useradd -ms /bin/bash ubuntu && adduser ubuntu sudo && echo -n 'ubuntu:ubuntu' | chpasswd + +# Enable passwordless sudo for users under the "sudo" group +RUN sed -i.bkp -e \ + 's/%sudo\s\+ALL=(ALL\(:ALL\)\?)\s\+ALL/%sudo ALL=NOPASSWD:ALL/g' \ + /etc/sudoers + +USER ubuntu +WORKDIR /home/ubuntu/ + +ENV PATH ${PATH}:/home/ubuntu/.local diff --git a/tools/labs/docker/docs/Dockerfile.server b/tools/labs/docker/docs/Dockerfile.server new file mode 100644 index 00000000000000..96477d6a1d49aa --- /dev/null +++ b/tools/labs/docker/docs/Dockerfile.server @@ -0,0 +1,24 @@ +FROM ubuntu:20.04 + +ENV DEBIAN_FRONTEND noninteractive +RUN apt-get update +RUN apt-get install -y apt-utils +RUN apt-get install -y sudo +RUN apt-get install -y python3 +# append new packages here, to minimize docker rebuild time + +RUN rm -rf /var/lib/apt/lists/* + +RUN useradd -ms /bin/bash ubuntu && adduser ubuntu sudo && echo -n 'ubuntu:ubuntu' | chpasswd + +# Enable passwordless sudo for users under the "sudo" group +RUN sed -i.bkp -e \ + 's/%sudo\s\+ALL=(ALL\(:ALL\)\?)\s\+ALL/%sudo ALL=NOPASSWD:ALL/g' \ + /etc/sudoers + +USER ubuntu +WORKDIR /home/ubuntu/ + +ENV PATH ${PATH}:/home/ubuntu/.local + +CMD cd /linux/Documentation/output/teaching && python3 -m http.server diff --git a/tools/labs/docker/kernel/Dockerfile b/tools/labs/docker/kernel/Dockerfile new file mode 100644 index 00000000000000..60f17ec61737f5 --- /dev/null +++ b/tools/labs/docker/kernel/Dockerfile @@ -0,0 +1,52 @@ +FROM ubuntu:20.04 + +RUN apt-get update +RUN apt-get install -y software-properties-common +RUN apt-get install -y gcc-multilib +RUN apt-get install -y libncurses5-dev +RUN apt-get install -y bc +RUN apt-get install -y qemu-system-x86 +RUN apt-get install -y qemu-system-arm +RUN apt-get install -y python3 +RUN apt-get install -y minicom +RUN apt-get install -y git +RUN apt-get install -y wget +RUN apt-get install -y curl +RUN apt-get install -y sudo + +RUN apt-get install -y iproute2 +RUN apt-get install -y netcat-openbsd +RUN apt-get install -y vim +RUN apt-get install -y dnsmasq +RUN apt-get install -y iputils-ping +RUN apt-get install -y bash-completion +RUN apt-get install -y build-essential +RUN apt-get install -y bison +RUN apt-get install -y flex +RUN apt-get install -y gdb +RUN apt-get install -y asciinema + +RUN apt-get install -y libssl-dev +RUN apt-get install -y git ninja-build pkg-config libglib2.0-dev libpixman-1-dev +RUN apt-get install -y lzop + +RUN apt-get install -y samba # for the new make console command + +RUN apt-get -y clean + +RUN rm -rf /var/lib/apt/lists/* + +ARG ARG_UID +ARG ARG_GID + +RUN groupadd -g $ARG_GID ubuntu +RUN useradd -u $ARG_UID -g $ARG_GID -ms /bin/bash ubuntu && adduser ubuntu sudo && echo -n 'ubuntu:ubuntu' | chpasswd + +# Enable passwordless sudo for users under the "sudo" group +RUN sed -i.bkp -e \ + 's/%sudo\s\+ALL=(ALL\(:ALL\)\?)\s\+ALL/%sudo ALL=NOPASSWD:ALL/g' \ + /etc/sudoers + +USER ubuntu +WORKDIR /home/ubuntu/ +RUN echo add-auto-load-safe-path /linux/scripts/gdb/vmlinux-gdb.py > ~/.gdbinit diff --git a/tools/labs/qemu/Makefile b/tools/labs/qemu/Makefile new file mode 100644 index 00000000000000..e9ee4ec1b50086 --- /dev/null +++ b/tools/labs/qemu/Makefile @@ -0,0 +1,145 @@ +QEMU_DISPLAY ?= none +ARCH ?= x86 +ifeq ($(ARCH),x86) +b = b +endif + +ZIMAGE = $(KDIR)/arch/$(ARCH)/boot/$(b)zImage +KCONFIG = $(KDIR)/.config +NTTCP = $(KDIR)/tools/labs/templates/assignments/6-e100/nttcp + +YOCTO_URL = http://downloads.yoctoproject.org/releases/yocto/yocto-2.3/machines/qemu/qemu$(ARCH)/ +YOCTO_IMAGE = core-image-minimal-qemu$(ARCH).ext4 +#YOCTO_IMAGE = core-image-minimal-dev-qemu$(ARCH).ext4 +#YOCTO_IMAGE = core-image-sato-dev-qemu$(ARCH).ext4 +#YOCTO_IMAGE = core-image-sato-qemu$(ARCH).ext4 +#YOCTO_IMAGE = core-image-sato-sdk-qemu$(ARCH).ext4 + +QEMU_OPTS = -kernel $(ZIMAGE) \ + -device virtio-serial \ + -chardev pty,id=virtiocon0 -device virtconsole,chardev=virtiocon0 \ + -serial pipe:pipe1 -serial pipe:pipe2 \ + -netdev tap,id=lkt-tap0,ifname=lkt-tap0,script=no,downscript=no -net nic,netdev=lkt-tap0,model=virtio \ + -netdev tap,id=lkt-tap1,ifname=lkt-tap1,script=no,downscript=no -net nic,netdev=lkt-tap1,model=i82559er \ + -drive file=$(YOCTO_IMAGE),if=virtio,format=raw \ + -drive file=disk1.img,if=virtio,format=raw \ + -drive file=disk2.img,if=virtio,format=raw \ + --append "root=/dev/vda loglevel=15 console=hvc0 pci=noacpi" \ + --display $(QEMU_DISPLAY) -s -m 256 + +ifdef ENABLE_KVM +QEMU_OPTS += -enable-kvm -cpu host +ENABLE_KVM_SUDO = sudo +endif + +boot: .modinst lkt-tap0 lkt-tap1 pipe1.in pipe1.out pipe2.in pipe2.out disk1.img disk2.img nttcp-run + $(ENABLE_KVM_SUDO) ARCH=$(ARCH) qemu/qemu.sh $(QEMU_OPTS) + +TEMPDIR := $(shell mktemp -u) + +$(KCONFIG): qemu/kernel_config.x86 + cp $^ $@ + $(MAKE) -C $(KDIR) olddefconfig + $(MAKE) -C $(KDIR) mod2yesconfig + +zImage: $(ZIMAGE) + +$(ZIMAGE): $(KCONFIG) + $(MAKE) -C $(KDIR) + $(MAKE) -C $(KDIR) modules + +.modinst: $(ZIMAGE) $(YOCTO_IMAGE) + mkdir $(TEMPDIR) + sudo mount -t ext4 -o loop $(YOCTO_IMAGE) $(TEMPDIR) + sudo $(MAKE) -C $(KDIR) modules_install INSTALL_MOD_PATH=$(TEMPDIR) + sudo umount $(TEMPDIR) + rmdir $(TEMPDIR) + sleep 1 && touch .modinst + +gdb: $(ZIMAGE) + gdb -ex "target remote localhost:1234" $(KDIR)/vmlinux + +$(YOCTO_IMAGE): + wget $(YOCTO_URL)/$(YOCTO_IMAGE) + sudo ARCH=$(ARCH) qemu/prepare-image.sh $(YOCTO_IMAGE) + +lkt-tap0: + qemu/create_net.sh $@ + +lkt-tap1: + qemu/create_net.sh $@ + +lkt-tap-smbd: + qemu/create_net.sh $@ + + +nttcp-run: $(NTTCP) lkt-tap1 + $(NTTCP) -v -i & + +pipe1.in: + mkfifo $@ + +pipe1.out: + mkfifo $@ + +pipe2.in: pipe1.out + ln $< $@ + +pipe2.out: pipe1.in + ln $< $@ + +disk0.img: + qemu-img create -f raw $@ 100M + +disk1.img: + qemu-img create -f raw $@ 100M + +disk2.img: + qemu-img create -f raw $@ 100M + +clean:: + -rm -f .modinst + -rm -f disk1.img disk2.img + -rm -f pipe1.in pipe1.out pipe2.in pipe2.out + +# Run with one of the following: +# make -j$(nproc) console +# make -j$(nproc) gui + +# Attach debugger with +# make gdb + +# Stop with +# sync # make sure all filesystem changes are propagated +# CTRL-A X # kill qemu, faster and more reliable than poweroff etc. + +# Compile in skel directories with something like: +# alias kmake='make -C "$HOME/src/linux/" M="$(pwd)"' +# kmake + +YOCTO_ROOTFS = core-image-minimal-qemu$(ARCH).tar.bz2 + +console: $(ZIMAGE) rootfs lkt-tap0 lkt-tap1 lkt-tap-smbd disk0.img disk1.img disk2.img pipe1.in pipe1.out pipe2.in pipe2.out + MODE=console qemu/run-qemu.sh + +checker: $(ZIMAGE) rootfs lkt-tap0 lkt-tap1 lkt-tap-smbd disk0.img disk1.img disk2.img pipe1.in pipe1.out pipe2.in pipe2.out + MODE=checker qemu/run-qemu.sh + +gui: $(ZIMAGE) rootfs lkt-tap0 lkt-tap1 lkt-tap-smbd disk0.img disk1.img disk2.img pipe1.in pipe1.out pipe2.in pipe2.out + MODE=gui qemu/run-qemu.sh + +rootfs: $(YOCTO_ROOTFS) + mkdir -p rootfs + tar -xf core-image-minimal-qemux86.tar.bz2 -C rootfs + sed -i 's@/sbin/getty@& -n -l "/sbin/rootlogin"@' rootfs/bin/start_getty + printf '%s\n' '#!/bin/sh' '/bin/login -f root' > rootfs/sbin/rootlogin + chmod +x rootfs/sbin/rootlogin + mkdir -p rootfs/home/root/skels + echo "//10.0.2.1/skels /home/root/skels cifs port=4450,guest,user=dummy 0 0" >> rootfs/etc/fstab + echo "hvc0:12345:respawn:/sbin/getty 115200 hvc0" >> rootfs/etc/inittab + +$(YOCTO_ROOTFS): + wget $(YOCTO_URL)/$(YOCTO_ROOTFS) + + +.PHONY: console gui boot gdb clean lkt-tap0 lkt-tap1 lkt-tap-smbd diff --git a/tools/labs/qemu/cleanup-net.sh b/tools/labs/qemu/cleanup-net.sh new file mode 100755 index 00000000000000..536702fff31c90 --- /dev/null +++ b/tools/labs/qemu/cleanup-net.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +DNSMASQ=/tmp/dnsmasq + +for device in lkt-tap0 lkt-tap1 lkt-tap-smbd; do + if ! ip a s dev &> /dev/null $device; then + continue + fi + if [ -f $DNSMASQ-$device.pid ]; then + sudo kill $(cat $DNSMASQ-$device.pid) + fi + sudo rm $DNSMASQ-$device.leases + if [ -e $(which --skip-alias firewall-cmd) ]; then + sudo firewall-cmd --zone=trusted --remove-interface=$device + fi + sudo ip tuntap del $device mode tap +done diff --git a/tools/labs/qemu/copy.sh b/tools/labs/qemu/copy.sh new file mode 100755 index 00000000000000..1ee76de911efa8 --- /dev/null +++ b/tools/labs/qemu/copy.sh @@ -0,0 +1,30 @@ +#!/bin/bash + +YOCTO_IMAGE=$1 +TEMPDIR=`mktemp -u` + +# $1 - target directory +# $2 - prefix for cp command (e.g. sudo) +do_copy() +{ + find skels -type f \( -name *.ko -or -executable \) | xargs --no-run-if-empty $2 cp --parents -t $1 + find skels -type d \( -name checker \) | xargs --no-run-if-empty $2 cp -r --parents -t $1 +} + +if [ -e qemu.mon ]; then + ip=`tail -n1 /tmp/dnsmasq-lkt-tap0.leases | cut -f3 -d ' '` + if [ -z "$ip" ]; then + echo "qemu is running and no IP address found" + exit 1 + fi + mkdir $TEMPDIR + do_copy $TEMPDIR + scp -q -r -O -o StrictHostKeyChecking=no $TEMPDIR/* root@$ip:. + rm -rf $TEMPDIR +else + mkdir $TEMPDIR + sudo mount -t ext4 -o loop $YOCTO_IMAGE $TEMPDIR + do_copy $TEMPDIR/home/root sudo + sudo umount $TEMPDIR + rmdir $TEMPDIR +fi diff --git a/tools/labs/qemu/create_net.sh b/tools/labs/qemu/create_net.sh new file mode 100755 index 00000000000000..adb1a665bd2b05 --- /dev/null +++ b/tools/labs/qemu/create_net.sh @@ -0,0 +1,46 @@ +#!/bin/bash + +if test $# -ne 1; then + echo "Usage: $0 " 1>&2 + echo " must be lkt-tap0 or lkt-tap1" + exit 1 +fi + +device=$1 + +USER=$(whoami) + +case "$device" in + "lkt-tap0") + subnet=172.213.0 + ;; + "lkt-tap1") + subnet=172.30.0 + ;; + "lkt-tap-smbd") + subnet=10.0.2 + ;; + *) + echo "Unknown device" 1>&2 + exit 1 + ;; +esac + +# If device doesn't exist add device. +if ! /sbin/ip link show dev "$device" > /dev/null 2>&1; then + sudo ip tuntap add mode tap user "$USER" dev "$device" +fi + +# Reconfigure just to be sure (even if device exists). +sudo /sbin/ip address flush dev "$device" +sudo /sbin/ip link set dev "$device" down +sudo /sbin/ip address add $subnet.1/24 dev "$device" +sudo /sbin/ip link set dev "$device" up + +if [ -e $(which --skip-alias firewall-cmd) ]; then + sudo firewall-cmd --zone=trusted --change-interface=$device +fi + +sudo dnsmasq --port=0 --no-resolv --no-hosts --bind-interfaces \ + --interface $device -F $subnet.2,$subnet.20 --listen-address $subnet.1 \ + -x /tmp/dnsmasq-$device.pid -l /tmp/dnsmasq-$device.leases || true diff --git a/tools/labs/qemu/kernel_config.x86 b/tools/labs/qemu/kernel_config.x86 new file mode 100644 index 00000000000000..3c91bd5c2d8a2b --- /dev/null +++ b/tools/labs/qemu/kernel_config.x86 @@ -0,0 +1,114 @@ +# CONFIG_LOCALVERSION_AUTO is not set +# CONFIG_CROSS_MEMORY_ATTACH is not set +# CONFIG_USELIB is not set +CONFIG_BLK_DEV_INITRD=y +# CONFIG_COMPAT_BRK is not set +CONFIG_SLAB=y +# CONFIG_64BIT is not set +CONFIG_SMP=y +# CONFIG_X86_EXTENDED_PLATFORM is not set +# CONFIG_SCHED_OMIT_FRAME_POINTER is not set +# CONFIG_X86_MCE is not set +# CONFIG_MICROCODE is not set +# CONFIG_RELOCATABLE is not set +# CONFIG_SUSPEND is not set +# CONFIG_VIRTUALIZATION is not set +CONFIG_KPROBES=y +# CONFIG_SECCOMP is not set +CONFIG_MODULES=y +CONFIG_MODULE_UNLOAD=y +# CONFIG_UNUSED_SYMBOLS is not set +CONFIG_BINFMT_MISC=y +# CONFIG_COMPACTION is not set +CONFIG_NET=y +CONFIG_PACKET=y +CONFIG_UNIX=y +CONFIG_INET=y +CONFIG_NETFILTER=y +CONFIG_PCI=y +CONFIG_DEVTMPFS=y +# CONFIG_STANDALONE is not set +# CONFIG_PREVENT_FIRMWARE_BUILD is not set +CONFIG_BLK_DEV_LOOP=y +CONFIG_VIRTIO_BLK=y +CONFIG_NETDEVICES=y +CONFIG_NETCONSOLE=y +CONFIG_VIRTIO_NET=y +CONFIG_VIRTIO_CONSOLE=y +# CONFIG_HW_RANDOM is not set +# CONFIG_DEVMEM is not set +# CONFIG_HWMON is not set +# CONFIG_HID_A4TECH is not set +# CONFIG_HID_APPLE is not set +# CONFIG_HID_BELKIN is not set +# CONFIG_HID_CHERRY is not set +# CONFIG_HID_CHICONY is not set +# CONFIG_HID_CYPRESS is not set +# CONFIG_HID_EZKEY is not set +# CONFIG_HID_KENSINGTON is not set +# CONFIG_HID_MICROSOFT is not set +# CONFIG_HID_MONTEREY is not set +# CONFIG_USB_SUPPORT is not set +CONFIG_VIRTIO_PCI=y +# CONFIG_X86_PLATFORM_DEVICES is not set +# CONFIG_IOMMU_SUPPORT is not set +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y +# CONFIG_DNOTIFY is not set +# CONFIG_INOTIFY_USER is not set +CONFIG_PROC_KCORE=y +CONFIG_TMPFS=y +CONFIG_TMPFS_POSIX_ACL=y +CONFIG_CONFIGFS_FS=y +# CONFIG_MISC_FILESYSTEMS is not set +CONFIG_CRYPTO_ECHAINIV=y +CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF4=y +CONFIG_GDB_SCRIPTS=y +# CONFIG_ENABLE_MUST_CHECK is not set +# CONFIG_SECTION_MISMATCH_WARN_ONLY is not set +CONFIG_MAGIC_SYSRQ=y +CONFIG_DEBUG_KERNEL=y +CONFIG_DEBUG_SLAB=y +CONFIG_DEBUG_KMEMLEAK=y +CONFIG_PROVE_LOCKING=y +CONFIG_DEBUG_ATOMIC_SLEEP=y +# CONFIG_X86_VERBOSE_BOOTUP is not set +# CONFIG_X86_DEBUG_FPU is not set + + +# for perf_events: +CONFIG_PERF_EVENTS=y +# for stack traces: +CONFIG_FRAME_POINTER=y +# kernel symbols: +CONFIG_KALLSYMS=y +# tracepoints: +CONFIG_TRACEPOINTS=y +# kernel function trace: +CONFIG_FTRACE=y +# kernel-level dynamic tracing: +CONFIG_KPROBE_EVENTS=y +# user-level dynamic tracing: +CONFIG_UPROBES=y +CONFIG_UPROBE_EVENTS=y +# kernel lock tracing: +CONFIG_LOCKDEP=y +# kernel lock tracing: +CONFIG_LOCK_STAT=y + +# for qemu default netdev: +CONFIG_E1000=y + +# for CIFS/SMB/samba rootfs and share +CONFIG_IP_PNP=y +CONFIG_IP_PNP_DHCP=y +CONFIG_CIFS=y +CONFIG_CIFS_XATTR=y +CONFIG_CIFS_POSIX=y +CONFIG_CIFS_ROOT=y + +# enable KDB +CONFIG_KGDB=y +CONFIG_KGDB_KDB=y diff --git a/tools/labs/qemu/prepare-image.sh b/tools/labs/qemu/prepare-image.sh new file mode 100755 index 00000000000000..834469d189e1fb --- /dev/null +++ b/tools/labs/qemu/prepare-image.sh @@ -0,0 +1,32 @@ +#!/bin/bash + +size=$(stat -c%s $1) +if [ $size -lt 50000000 ]; then + e2fsck -f $1 + resize2fs $1 64M +fi + +TMP=$(mktemp -d) + +mount -t ext4 -o loop $1 $TMP + +# add console +if [ "$ARCH" = "x86" ]; then + echo "hvc0:12345:respawn:/sbin/getty 115200 hvc0" >> $TMP/etc/inittab +else + echo "mxc0:12345:respawn:/sbin/getty 115200 ttymxc0" >> $TMP/etc/inittab +fi + +# add more vty +cat >> $TMP/etc/inittab <> $TMP/etc/network/interfaces + +sudo umount $TMP +rmdir $TMP diff --git a/tools/labs/qemu/qemu.sh b/tools/labs/qemu/qemu.sh new file mode 100755 index 00000000000000..6073942b8fff78 --- /dev/null +++ b/tools/labs/qemu/qemu.sh @@ -0,0 +1,24 @@ +#!/bin/bash +# +# This script runs qemu and creates a symbolic link named serial.pts +# to the qemu serial console (pts based). Because the qemu pts +# allocation is dynamic, it is preferable to have a stable path to +# avoid visual inspection of the qemu output when connecting to the +# serial console. + +SCRIPT_DIR=$(dirname "${BASH_SOURCE[0]}") + +case $ARCH in + x86) + qemu=qemu-system-i386 + ;; + arm) + qemu=qemu-system-arm + ;; +esac + +echo info chardev | nc -U -l qemu.mon | egrep --line-buffered -o "/dev/pts/[0-9]*" | xargs -I PTS ln -fs PTS serial.pts & +$qemu "$@" -monitor unix:qemu.mon +rm qemu.mon +rm serial.pts +$SCRIPT_DIR/cleanup-net.sh diff --git a/tools/labs/qemu/run-qemu.sh b/tools/labs/qemu/run-qemu.sh new file mode 100755 index 00000000000000..9938ec18ee3c50 --- /dev/null +++ b/tools/labs/qemu/run-qemu.sh @@ -0,0 +1,148 @@ +#!/bin/bash + +QEMU_PIDFILE=$(mktemp) +SAMBA_DIR=$(mktemp -d) + +die() { echo "$0: error: $@" >&2; exit 1; } + +script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" > /dev/null && pwd)" + +base_dir=${ROOT:-"$(readlink -f "$script_dir/..")"} +arch=${ARCH:-"x86"} +kernel=${ZIMAGE:-"$(readlink -f "$base_dir/../../arch/$arch/boot/bzImage")"} +rootfs=${ROOTFS:-"$(readlink -f "$base_dir/rootfs")"} +skels=${SKELS:-"$(readlink -f "$base_dir/skels")"} + +QEMU_BKGRND="-daemonize" +CHECKER=0 + +mode="${MODE:-console}" +case "$mode" in + console) + qemu_display="-display none" + linux_console="console=hvc0" + ;; + gui) + # QEMU_DISPLAY = sdl, gtk, ... + qemu_display="-display ${QEMU_DISPLAY:-"sdl"}" + linux_console="" + ;; + checker) + qemu_display="-display none" + QEMU_BKGRND="" + CHECKER=1 + ;; + *) echo "unknown mode '$MODE'" >&2; exit 1 ;; +esac + +case "$arch" in + x86) qemu_arch=i386 ;; + arm) qemu_arch=arm ;; + *) echo "unknown architecture '$arch'" >&2; exit 1 ;; +esac + +smbd=${SMBD:-"smbd"} + +qemu=${QEMU:-"qemu-system-$qemu_arch"} +if kvm-ok; then + qemu_kvm=${QEMU_KVM:-"-enable-kvm -cpu host"} +fi +qemu_cpus=${QEMU_CPUS:-"1"} +qemu_mem=${QEMU_MEM:-"512"} +qemu_display=${QEMU_DISPLAY:-"$qemu_display"} +qemu_addopts=${QEMU_ADD_OPTS:-""} +linux_console=${LINUX_CONSOLE:-"$linux_console"} +linux_loglevel=${LINUX_LOGLEVEL:-"15"} +linux_term=${LINUX_TERM:-"TERM=xterm"} +linux_addcmdline=${LINUX_ADD_CMDLINE:-""} + +linux_cmdline=${LINUX_CMDLINE:-"root=/dev/cifs rw ip=dhcp cifsroot=//10.0.2.1/rootfs,port=4450,guest,user=dummy $linux_console loglevel=$linux_loglevel pci=noacpi $linux_term $linux_addcmdline"} + +user=$(id -un) + +cat << EOF > "$SAMBA_DIR/smbd.conf" +[global] + interfaces = 10.0.2.1 + smb ports = 4450 + private dir = $SAMBA_DIR + bind interfaces only = yes + pid directory = $SAMBA_DIR + lock directory = $SAMBA_DIR + state directory = $SAMBA_DIR + cache directory = $SAMBA_DIR + ncalrpc dir = $SAMBA_DIR/ncalrpc + log file = $SAMBA_DIR/log.smbd + smb passwd file = $SAMBA_DIR/smbpasswd + security = user + map to guest = Bad User + load printers = no + printing = bsd + disable spoolss = yes + usershare max shares = 0 + + server min protocol = NT1 + unix extensions = yes + + server role = standalone server + public = yes + writeable = yes + #admin users = root + #create mask = 0777 + #directory mask = 0777 + force user = $user + force group = $user + + +[rootfs] + path = $rootfs +[skels] + path = $skels +EOF + +[ -x "$(command -v "$smbd")" ] || die "samba ('$smbd') not found" +[ -x "$(command -v "$qemu")" ] || die "qemu ('$qemu') not found" + +mkdir -p "$skels" + +"$smbd" --no-process-group -s "$SAMBA_DIR/smbd.conf" -l "$SAMBA_DIR" >/dev/null 2>/dev/null & + +"$qemu" \ + $qemu_kvm \ + $QEMU_BKGRND \ + -pidfile $QEMU_PIDFILE \ + -device virtio-serial -chardev socket,id=virtiocon0,path=serial_console.socket,server,nowait -device virtconsole,chardev=virtiocon0 \ + -smp "$qemu_cpus" -m "$qemu_mem" \ + -no-reboot \ + -kernel "$kernel" \ + -append "$linux_cmdline" \ + -serial pipe:pipe1 \ + -serial pipe:pipe2 \ + -netdev tap,id=lkt-tap-smbd,ifname=lkt-tap-smbd,script=no,downscript=no -net nic,netdev=lkt-tap-smbd,model=virtio \ + -netdev tap,id=lkt-tap0,ifname=lkt-tap0,script=no,downscript=no -net nic,netdev=lkt-tap0,model=virtio \ + -netdev tap,id=lkt-tap1,ifname=lkt-tap1,script=no,downscript=no -net nic,netdev=lkt-tap1,model=i82559er \ + -drive file=disk0.img,if=virtio,format=raw \ + -drive file=disk1.img,if=virtio,format=raw \ + -drive file=disk2.img,if=virtio,format=raw \ + -gdb tcp::1234 \ + $qemu_display \ + $qemu_addopts + +sleep 2 +if [[ $CHECKER != 1 ]]; then + minicom -D unix\#serial_console.socket +fi + +# This seems to reset to the mode the terminal was prior to launching QEMU +# Inspired by +# https://github.com/landley/toybox/blob/990e0e7a40e4509c7987a190febe5d867f412af6/toys/other/reset.c#L26-L28 +# man 4 console_codes, ESC [ ? 7 h +printf '\e[?7h' + +echo "Cleaning up...Please wait!" +pkill -F $QEMU_PIDFILE +$script_dir/cleanup-net.sh +pkill -F ${SAMBA_DIR}/smbd.pid +sleep 1 +rm -rf $SAMBA_DIR +rm -f $QEMU_PIDFILE +rm -f serial_console.socket diff --git a/tools/labs/scripts/install-startup-script b/tools/labs/scripts/install-startup-script new file mode 100755 index 00000000000000..4df78826c1d98c --- /dev/null +++ b/tools/labs/scripts/install-startup-script @@ -0,0 +1,34 @@ +#!/bin/bash + +# Sample run: +# sudo ./install-startup-script ../clean-slate.core-image-minimal-qemux86.ext4 + +startup_script="so2-startup-script" + +if test $UID -ne 0; then + echo "You must be root to run this script." 1>&2 + exit 1 +fi + +if test $# -ne 1; then + echo "Usage: path/to/rootfs/image" 1>&2 + exit 1 +fi + +rootfs_image="$1" + +mkdir tmp_mnt +mount -t ext4 -o loop "$rootfs_image" tmp_mnt + +cp "$startup_script" tmp_mnt/etc/init.d/so2 +pushd tmp_mnt/etc/ > /dev/null 2>&1 +for i in rc*.d; do + cd "$i" + pwd + ln -sfn ../init.d/so2 S99so2 + cd .. +done +popd > /dev/null 2>&1 + +umount tmp_mnt +rmdir tmp_mnt diff --git a/tools/labs/scripts/so2-startup-script b/tools/labs/scripts/so2-startup-script new file mode 100755 index 00000000000000..3335210da90a7c --- /dev/null +++ b/tools/labs/scripts/so2-startup-script @@ -0,0 +1,36 @@ +#!/bin/sh + +echo "SO2 checker starts ..." > /dev/hvc0 +echo "SO2 checker starts ..." > /so2-checker.debug + +ip a a dev eth0 172.213.0.7/24 +ip l set dev eth0 up +sleep 1 + +modprobe netconsole netconsole=6666@172.213.0.7/eth0,6666@172.213.0.1/ + +dmesg -n 8 +dmesg -c /dev/null 2>&1 +sleep 3 + +echo "Starting tests ..." > /dev/hvc0 +echo "Starting tests ..." >> /so2-checker.debug +date > /dev/hvc0 +date >> /so2-checker.debug + +cd /home/root +/bin/sh ./run.sh > /dev/hvc0 + +echo "Testing complete." > /dev/hvc0 +echo "Testing complete." >> /so2-checker.debug +date > /dev/hvc0 +date >> /so2-checker.debug + +sleep 5 +rmmod netconsole + +sleep 3 +echo "All done. Shutting down ..." > /dev/hvc0 +echo "All done. Shutting down ..." >> /so2-checker.debug + +poweroff diff --git a/tools/labs/scripts/vmchecker-setup b/tools/labs/scripts/vmchecker-setup new file mode 100755 index 00000000000000..787b4a8f489d3c --- /dev/null +++ b/tools/labs/scripts/vmchecker-setup @@ -0,0 +1,12 @@ +#!/bin/bash + +if test ! -f ../initial.clean-slate.core-image-minimal-qemux86.ext4; then + wget http://downloads.yoctoproject.org/releases/yocto/yocto-2.3/machines/qemu/qemux86/core-image-minimal-qemux86.ext4 -O ../initial.clean-slate.core-image-minimal-qemux86.ext4 +fi +if test ! -f ../clean-slate.core-image-minimal-qemux86.ext4; then + cp ../initial.clean-slate.core-image-minimal-qemux86.ext4 ../clean-slate.core-image-minimal-qemux86.ext4 + sudo ./install-startup-script ../clean-slate.core-image-minimal-qemux86.ext4 +fi +if test ! -d ../out/; then + mkdir ../out/ +fi diff --git a/tools/labs/templates/api_assignment/checker/list-checker b/tools/labs/templates/api_assignment/checker/list-checker new file mode 100755 index 00000000000000..a57f7785b310cd --- /dev/null +++ b/tools/labs/templates/api_assignment/checker/list-checker @@ -0,0 +1,775 @@ +#!/bin/sh + +# +# List kernel API checker +# +# + +# Enable/disable debug (1/0). +DEBUG_=1 + +DEBUG() +{ + if test "x$DEBUG_" = "x1"; then + $@ 1>&2 + fi +} + +max_points=90 + +# Enable/disable exiting when program fails. +EXIT_IF_FAIL=0 + +test_do_fail() +{ + points=$1 + printf "failed [ 0/%02d]\n" "$max_points" + if test "x$EXIT_IF_FAIL" = "x1"; then + exit 1 + fi +} + +test_do_pass() +{ + points=$1 + printf "passed [%02d/%02d]\n" "$points" "$max_points" +} + +basic_test() +{ + message=$1 + points=$2 + shift 2 + test_command=$@ + + printf "%s" "$message" + + i=0 + limit=$((60 - ${#message})) + while test "$i" -lt "$limit"; do + printf "." + i=$(($i+1)) + done + + $test_command > /dev/null 2>&1 + if test $? -eq 0; then + test_do_pass "$points" + else + test_do_fail "$points" + fi +} + +module="list" +module_file="$module".ko +proc_folder="/proc/list" +preview="$proc_folder/preview" +management="$proc_folder/management" + +init_test() +{ + dmesg -c > /dev/null 2>&1 + insmod "$module_file" + if test $? -ne 0; then + echo "Error inserting module." 1>&2 + exit 1 + fi + sleep 1 +} + +cleanup_test() +{ + rmmod "$module" +} + +test_module_exists() +{ + init_test + + lsmod 2> /dev/null | grep -w list > /dev/null 2>&1 + basic_test "module_exists" 1 test "$?" -eq 0 + + cleanup_test +} + +test_proc_folder_exits() +{ + init_test + + basic_test "folder_exists" 1 test -d "$proc_folder" + + cleanup_test +} + +test_preview_exits() +{ + init_test + + basic_test "preview_exists" 1 test -f "$preview" + + cleanup_test +} + +test_management_exits() +{ + init_test + + basic_test "management_exists" 1 test -f "$management" + + cleanup_test +} + +test_preview_is_readable() +{ + init_test + + cat "$preview" > /dev/null 2>&1 + basic_test "preview_is_readable" 1 test $? -eq 0 + + cleanup_test +} + +test_preview_is_not_writable() +{ + init_test + + echo "hello" 2>&1 > "$preview" | grep "Input/output error" > /dev/null + basic_test "preview_is_not_writable" 1 test $? -eq 0 + + cleanup_test +} + +test_management_is_writable() +{ + init_test + + echo "hello" 2>&1 > "$management" | grep "Input/output error" > /dev/null + basic_test "management_is_writable" 1 test $? -ne 0 + + cleanup_test +} + +test_management_is_not_readable() +{ + init_test + + cat "$management" > /dev/null 2>&1 + basic_test "management_is_not_readable" 1 test $? -ne 0 + + cleanup_test +} + +test_addf_no_error() +{ + init_test + + echo "addf lorem" > "$management" 2> /dev/null + basic_test "addf_no_error" 2 test $? -eq 0 + + cleanup_test +} + +test_addf_print() +{ + init_test + + echo "addf lorem" > "$management" + cat "$preview" | grep "lorem" > /dev/null + basic_test "addf_print" 2 test $? -eq 0 + + cleanup_test +} + +test_addf_print_once() +{ + init_test + + echo "addf lorem" > "$management" + no=$(cat "$preview" | grep "lorem" | wc -l) + basic_test "addf_print_once" 2 test "$no" -eq 1 + + cleanup_test +} + +test_adde_no_error() +{ + init_test + + echo "adde lorem" > "$management" 2> /dev/null + basic_test "adde_no_error" 2 test $? -eq 0 + + cleanup_test +} + +test_adde_print() +{ + init_test + + echo "adde lorem" > "$management" + cat "$preview" | grep "lorem" > /dev/null + basic_test "adde_print" 2 test $? -eq 0 + + cleanup_test +} + +test_adde_print_once() +{ + init_test + + echo "adde lorem" > "$management" + no=$(cat "$preview" | grep "lorem" | wc -l) + basic_test "adde_print_once" 2 test "$no" -eq 1 + + cleanup_test +} + +test_addf_two_number() +{ + init_test + + echo "addf lorem" > "$management" + echo "addf ipsum" > "$management" + no=$(cat "$preview" | wc -l) + basic_test "addf_two_number" 2 test "$no" -eq 2 + + cleanup_test +} + +test_addf_two_content() +{ + init_test + + echo "addf lorem" > "$management" + echo "addf ipsum" > "$management" + cat "$preview" | grep "lorem" > /dev/null + ret1=$? + cat "$preview" | grep "ipsum" > /dev/null + ret2=$? + basic_test "addf_two_content" 2 test "$ret1" -eq 0 -a "$ret2" -eq 0 + + cleanup_test +} + +test_addf_same_twice_number() +{ + init_test + + echo "addf lorem" > "$management" + echo "addf lorem" > "$management" + no=$(cat "$preview" | wc -l) + basic_test "addf_same_twice_number" 2 test "$no" -eq 2 + + cleanup_test +} + +test_addf_same_twice_content() +{ + init_test + + echo "addf lorem" > "$management" + echo "addf lorem" > "$management" + cat "$preview" | grep "lorem" > /dev/null + basic_test "addf_same_twice_content" 2 test $? -eq 0 + + cleanup_test +} + +test_addf_same_twice_content_number() +{ + init_test + + echo "addf lorem" > "$management" + echo "addf lorem" > "$management" + no=$(cat "$preview" | grep "lorem" | wc -l) + basic_test "addf_same_twice_content_number" 2 test "$no" -eq 2 + + cleanup_test +} + +test_adde_two_number() +{ + init_test + + echo "adde lorem" > "$management" + echo "adde ipsum" > "$management" + no=$(cat "$preview" | wc -l) + basic_test "adde_two_number" 2 test "$no" -eq 2 + + cleanup_test +} + +test_adde_two_content() +{ + init_test + + echo "adde lorem" > "$management" + echo "adde ipsum" > "$management" + cat "$preview" | grep "lorem" > /dev/null + ret1=$? + cat "$preview" | grep "ipsum" > /dev/null + ret2=$? + basic_test "adde_two_content" 2 test "$ret1" -eq 0 -a "$ret2" -eq 0 + + cleanup_test +} + +test_adde_same_twice_number() +{ + init_test + + echo "adde lorem" > "$management" + echo "adde lorem" > "$management" + no=$(cat "$preview" | wc -l) + basic_test "adde_same_twice_number" 2 test "$no" -eq 2 + + cleanup_test +} + +test_adde_same_twice_content() +{ + init_test + + echo "adde lorem" > "$management" + echo "adde lorem" > "$management" + cat "$preview" | grep "lorem" > /dev/null + basic_test "adde_same_twice_content" 2 test $? -eq 0 + + cleanup_test +} + +test_adde_same_twice_content_number() +{ + init_test + + echo "adde lorem" > "$management" + echo "adde lorem" > "$management" + no=$(cat "$preview" | grep "lorem" | wc -l) + basic_test "adde_same_twice_content_number" 2 test "$no" -eq 2 + + cleanup_test +} + +test_addf_front_after_addf() +{ + init_test + + echo "addf lorem" > "$management" + echo "addf ipsum" > "$management" + + # "ipsum" must be first. + cat "$preview" | head -n 1 | grep "ipsum" > /dev/null + basic_test "addf_front_after_addf" 2 test $? -eq 0 + + cleanup_test +} + +test_addf_front_after_adde() +{ + init_test + + echo "adde lorem" > "$management" + echo "addf ipsum" > "$management" + + # "ipsum" must be first. + cat "$preview" | head -n 1 | grep "ipsum" > /dev/null + basic_test "addf_front_after_adde" 2 test $? -eq 0 + + cleanup_test +} + +test_addf_front_after_adde_addf() +{ + init_test + + echo "adde lorem" > "$management" + echo "addf ipsum" > "$management" + echo "addf dolor" > "$management" + + # "dolor" must be first. + cat "$preview" | head -n 1 | grep "dolor" > /dev/null + basic_test "addf_front_after_adde_addf" 2 test $? -eq 0 + + cleanup_test +} + +test_addf_front_after_addf_adde() +{ + init_test + + echo "addf lorem" > "$management" + echo "adde ipsum" > "$management" + echo "addf dolor" > "$management" + + # "dolor" must be first. + cat "$preview" | head -n 1 | grep "dolor" > /dev/null + basic_test "addf_front_after_addf_adde" 2 test $? -eq 0 + + cleanup_test +} + +test_adde_end_after_addf() +{ + init_test + + echo "addf lorem" > "$management" + echo "adde ipsum" > "$management" + + # "ipsum" must be last. + cat "$preview" | tail -n -1 | grep "ipsum" > /dev/null + basic_test "adde_end_after_addf" 2 test $? -eq 0 + + cleanup_test +} + +test_adde_end_after_adde() +{ + init_test + + echo "adde lorem" > "$management" + echo "adde ipsum" > "$management" + + # "ipsum" must be last. + cat "$preview" | tail -n -1 | grep "ipsum" > /dev/null + basic_test "addf_end_after_adde" 2 test $? -eq 0 + + cleanup_test +} + +test_adde_end_after_adde_addf() +{ + init_test + + echo "adde lorem" > "$management" + echo "addf ipsum" > "$management" + echo "adde dolor" > "$management" + + # "dolor" must be last. + cat "$preview" | tail -n -1 | grep "dolor" > /dev/null + basic_test "adde_end_after_adde_addf" 2 test $? -eq 0 + + cleanup_test +} + +test_adde_end_after_addf_adde() +{ + init_test + + echo "addf lorem" > "$management" + echo "adde ipsum" > "$management" + echo "adde dolor" > "$management" + + # "dolor" must be last. + cat "$preview" | tail -n -1 | grep "dolor" > /dev/null + basic_test "adde_end_after_addf_adde" 2 test $? -eq 0 + + cleanup_test +} + +test_delf_no_error() +{ + init_test + + echo "addf lorem" > "$management" + echo "delf lorem" > "$management" + basic_test "delf_no_error" 2 test $? -eq 0 + + cleanup_test +} + +test_delf_removes_front() +{ + init_test + + echo "addf lorem" > "$management" + echo "delf lorem" > "$management" + no=$(cat "$preview" | wc -l) + basic_test "delf_removes_front" 2 test "$no" -eq 0 + + cleanup_test +} + +test_delf_removes_end() +{ + init_test + + echo "adde lorem" > "$management" + echo "delf lorem" > "$management" + no=$(cat "$preview" | wc -l) + basic_test "delf_removes_end" 2 test "$no" -eq 0 + + cleanup_test +} + +test_delf_front_number() +{ + init_test + + echo "addf lorem" > "$management" + echo "addf ipsum" > "$management" + echo "addf dolor" > "$management" + echo "delf lorem" > "$management" + no=$(cat "$preview" | wc -l) + basic_test "delf_front_number" 2 test "$no" -eq 2 + + cleanup_test +} + +test_delf_front_content() +{ + init_test + + echo "addf lorem" > "$management" + echo "addf ipsum" > "$management" + echo "addf dolor" > "$management" + echo "delf lorem" > "$management" + cat "$preview" | grep "lorem" > /dev/null + basic_test "delf_front_content" 2 test $? -ne 0 + + cleanup_test +} + +test_delf_first() +{ + init_test + + echo "addf lorem" > "$management" + echo "addf ipsum" > "$management" + echo "addf dolor" > "$management" + echo "delf ipsum" > "$management" + cat "$preview" | grep "ipsum" > /dev/null + basic_test "delf_first" 2 test $? -ne 0 + + cleanup_test +} + +test_delf_none() +{ + init_test + + echo "addf lorem" > "$management" + echo "adde ipsum" > "$management" + echo "delf dolor" > "$management" 2> /dev/null + no=$(cat "$preview" | wc -l) + basic_test "delf_none" 2 test "$no" -eq 2 + + cleanup_test +} + +test_dela_one_number() +{ + init_test + + echo "addf lorem" > "$management" + echo "addf ipsum" > "$management" + echo "addf dolor" > "$management" + echo "dela ipsum" > "$management" + no=$(cat "$preview" | wc -l) + basic_test "dela_one_number" 2 test "$no" -eq 2 + + cleanup_test +} + +test_dela_one_content() +{ + init_test + + echo "addf lorem" > "$management" + echo "addf ipsum" > "$management" + echo "addf dolor" > "$management" + echo "dela ipsum" > "$management" + cat "$preview" | grep "ipsum" > /dev/null + basic_test "dela_one_content" 2 test $? -ne 0 + + cleanup_test +} + +test_dela_two_number() +{ + init_test + + echo "addf lorem" > "$management" + echo "addf ipsum" > "$management" + echo "addf dolor" > "$management" + echo "adde ipsum" > "$management" + echo "dela ipsum" > "$management" + no=$(cat "$preview" | wc -l) + basic_test "dela_one_number" 2 test "$no" -eq 2 + + cleanup_test +} + +test_dela_two_content() +{ + init_test + + echo "adde lorem" > "$management" + echo "addf ipsum" > "$management" + echo "addf dolor" > "$management" + echo "adde ipsum" > "$management" + echo "dela ipsum" > "$management" + cat "$preview" | grep "ipsum" > /dev/null + basic_test "dela_one_content" 2 test $? -ne 0 + + cleanup_test +} + +test_dela_all_five_number() +{ + init_test + + echo "addf lorem" > "$management" + echo "adde lorem" > "$management" + echo "addf lorem" > "$management" + echo "adde lorem" > "$management" + echo "addf lorem" > "$management" + echo "dela lorem" > "$management" + no=$(cat "$preview" | wc -l) + basic_test "dela_all_five_number" 2 test "$no" -eq 0 + + cleanup_test +} + +test_dela_all_five_content() +{ + init_test + + echo "addf lorem" > "$management" + echo "adde lorem" > "$management" + echo "addf lorem" > "$management" + echo "adde lorem" > "$management" + echo "addf lorem" > "$management" + echo "dela lorem" > "$management" + cat "$preview" | grep "lorem" > /dev/null + basic_test "dela_all_five_content" 2 test $? -ne 0 + + cleanup_test +} + +test_dela_none() +{ + init_test + + echo "adde lorem" > "$management" + echo "addf ipsum" > "$management" + echo "dela dolor" > "$management" 2> /dev/null + no=$(cat "$preview" | wc -l) + basic_test "dela_none" 2 test "$no" -eq 2 + + cleanup_test +} + +test_mix_number() +{ + init_test + + echo "addf lorem" > "$management" + echo "adde ipsum" > "$management" + echo "adde dolor" > "$management" + echo "addf sit" > "$management" + echo "adde sit" > "$management" + echo "addf sit" > "$management" + echo "addf lorem" > "$management" + echo "delf lorem" > "$management" + echo "delf dolor" > "$management" + echo "adde dolor" > "$management" + echo "dela sit" > "$management" + echo "delf ipsum" > "$management" + echo "delf ipsum" > "$management" + echo "dela dolor" > "$management" + no=$(cat "$preview" | wc -l) + basic_test "mix_number" 3 test "$no" -eq 1 + + cleanup_test +} + +test_mix_content() +{ + init_test + + echo "addf lorem" > "$management" + echo "adde ipsum" > "$management" + echo "adde dolor" > "$management" + echo "addf sit" > "$management" + echo "adde sit" > "$management" + echo "addf sit" > "$management" + echo "addf lorem" > "$management" + echo "delf lorem" > "$management" + echo "delf dolor" > "$management" + echo "adde dolor" > "$management" + echo "dela sit" > "$management" + echo "delf ipsum" > "$management" + echo "delf ipsum" > "$management" + echo "dela dolor" > "$management" + cat "$preview" | grep "lorem" > /dev/null + basic_test "mix_content" 3 test "$no" -eq 1 + + cleanup_test +} + +run_tests() +{ + test_module_exists + test_proc_folder_exits + test_preview_exits + test_management_exits + test_preview_is_readable + test_preview_is_not_writable + test_management_is_writable + test_management_is_not_readable + test_addf_no_error + test_addf_print + test_addf_print_once + test_adde_no_error + test_adde_print + test_adde_print_once + test_addf_two_number + test_addf_two_content + test_addf_same_twice_number + test_addf_same_twice_content + test_addf_same_twice_content_number + test_adde_two_number + test_adde_two_content + test_adde_same_twice_number + test_adde_same_twice_content + test_adde_same_twice_content_number + test_addf_front_after_addf + test_addf_front_after_adde + test_addf_front_after_adde_addf + test_addf_front_after_addf_adde + test_adde_end_after_addf + test_adde_end_after_adde + test_adde_end_after_adde_addf + test_adde_end_after_addf_adde + test_delf_no_error + test_delf_removes_front + test_delf_removes_end + test_delf_front_number + test_delf_front_content + test_delf_first + test_delf_none + test_dela_one_number + test_dela_one_content + test_dela_two_number + test_dela_two_content + test_dela_all_five_number + test_dela_all_five_content + test_dela_none + test_mix_number + test_mix_content +} + +run_tests | tee results.txt + +cat results.txt | grep '\[.*\]$' | awk -F '[] /[]+' ' +BEGIN { + sum=0 +} + +{ + sum += $(NF-2); +} + +END { + printf "\n%66s [%02d/90]\n", "Total:", sum; +}' + +rm results.txt diff --git a/tools/labs/templates/api_assignment/kernel-api/Kbuild b/tools/labs/templates/api_assignment/kernel-api/Kbuild new file mode 100644 index 00000000000000..5e45a816841549 --- /dev/null +++ b/tools/labs/templates/api_assignment/kernel-api/Kbuild @@ -0,0 +1 @@ +obj-m = list.o diff --git a/tools/labs/templates/api_assignment/kernel-api/list.c b/tools/labs/templates/api_assignment/kernel-api/list.c new file mode 100644 index 00000000000000..39db4cef953ad2 --- /dev/null +++ b/tools/labs/templates/api_assignment/kernel-api/list.c @@ -0,0 +1,117 @@ +/* + * list.c - Linux kernel list API + * + * TODO 1/0: Fill in name / email + * Author: FirstName LastName + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#define PROCFS_MAX_SIZE 1024 + +#define procfs_dir_name "list" +#define procfs_file_read "preview" +#define procfs_file_write "management" + +struct proc_dir_entry *proc_list; +struct proc_dir_entry *proc_list_read; +struct proc_dir_entry *proc_list_write; + +/* TODO 2/0: define your list! */ + +static int list_proc_show(struct seq_file *m, void *v) +{ + /* TODO 3/0: print your list. One element / line. */ + seq_puts(m, "Remove this line\n"); + + return 0; +} + +static int list_read_open(struct inode *inode, struct file *file) +{ + return single_open(file, list_proc_show, NULL); +} + +static int list_write_open(struct inode *inode, struct file *file) +{ + return single_open(file, list_proc_show, NULL); +} + +static ssize_t list_write(struct file *file, const char __user *buffer, + size_t count, loff_t *offs) +{ + char local_buffer[PROCFS_MAX_SIZE]; + unsigned long local_buffer_size = 0; + + local_buffer_size = count; + if (local_buffer_size > PROCFS_MAX_SIZE) + local_buffer_size = PROCFS_MAX_SIZE; + + memset(local_buffer, 0, PROCFS_MAX_SIZE); + if (copy_from_user(local_buffer, buffer, local_buffer_size)) + return -EFAULT; + + /* local_buffer contains your command written in /proc/list/management + * TODO 4/0: parse the command and add/delete elements. + */ + + return local_buffer_size; +} + +static const struct file_operations r_fops = { + .owner = THIS_MODULE, + .open = list_read_open, + .read = seq_read, + .release = single_release, +}; + +static const struct file_operations w_fops = { + .owner = THIS_MODULE, + .open = list_write_open, + .write = list_write, + .release = single_release, +}; + +static int list_init(void) +{ + proc_list = proc_mkdir(procfs_dir_name, NULL); + if (!proc_list) + return -ENOMEM; + + proc_list_read = proc_create(procfs_file_read, 0000, proc_list, + &r_fops); + if (!proc_list_read) + goto proc_list_cleanup; + + proc_list_write = proc_create(procfs_file_write, 0000, proc_list, + &w_fops); + if (!proc_list_write) + goto proc_list_read_cleanup; + + return 0; + +proc_list_read_cleanup: + proc_remove(proc_list_read); +proc_list_cleanup: + proc_remove(proc_list); + return -ENOMEM; +} + +static void list_exit(void) +{ + proc_remove(proc_list); +} + +module_init(list_init); +module_exit(list_exit); + +MODULE_DESCRIPTION("Linux kernel list API"); +/* TODO 5/0: Fill in your name / email address */ +MODULE_AUTHOR("FirstName LastName +#include +#include + +MODULE_DESCRIPTION("Hello World"); +MODULE_AUTHOR("SO2"); +MODULE_LICENSE("GPL"); + +static int hello_world_init(void) +{ + pr_info("Hello ARM World!\n"); + + return 0; +} + +static void hello_world_exit(void) +{ + pr_info("Going out ARM world!\n"); +} + +module_init(hello_world_init); +module_exit(hello_world_exit); diff --git a/tools/labs/templates/arm_kernel_development/5-simple-driver/Kbuild b/tools/labs/templates/arm_kernel_development/5-simple-driver/Kbuild new file mode 100644 index 00000000000000..cef64bd67233a9 --- /dev/null +++ b/tools/labs/templates/arm_kernel_development/5-simple-driver/Kbuild @@ -0,0 +1,3 @@ +ccflags-y = -Wno-unused-function -Wno-unused-label -Wno-unused-variable + +obj-m = simple_driver.o diff --git a/tools/labs/templates/arm_kernel_development/5-simple-driver/simple_driver.c b/tools/labs/templates/arm_kernel_development/5-simple-driver/simple_driver.c new file mode 100644 index 00000000000000..5f220a96aee47a --- /dev/null +++ b/tools/labs/templates/arm_kernel_development/5-simple-driver/simple_driver.c @@ -0,0 +1,63 @@ +/* + * ARM Kernel Development + * + * simple_driver.c - Simple platform driver to demonstrate device + * probing + */ + +#include +#include +#include +#include + +MODULE_DESCRIPTION("Simple driver"); +MODULE_AUTHOR("SO2"); +MODULE_LICENSE("GPL"); + +static const struct of_device_id simple_device_ids[] = { + /* TODO 2/2: Add compatible strings */ + { .compatible = "so2,simple-device-v1"}, + { .compatible = "so2,simple-device-v2"}, + { /* sentinel */} +}; + +static int simple_probe(struct platform_device *pdev) +{ + pr_info("simple_probe() %pOF\n", pdev->dev.of_node); + + return 0; +} + +static int simple_remove(struct platform_device *pdev) +{ + pr_info("simple_remove()\n"); + + return 0; +} + +struct platform_driver simple_driver = { + .probe = simple_probe, + .remove = simple_remove, + .driver = { + .name = "simple_driver", + .of_match_table = simple_device_ids, + }, +}; + +static int simple_init(void) +{ + pr_info("Simple driver init!\n"); + + /* TODO 1/0: Notice simple_driver definition */ + return platform_driver_register(&simple_driver); +} + +static void simple_exit(void) +{ + pr_info("Simple driver exit\n"); + + platform_driver_unregister(&simple_driver); +} + +module_init(simple_init); +module_exit(simple_exit); diff --git a/tools/labs/templates/assignments/0-list/Kbuild b/tools/labs/templates/assignments/0-list/Kbuild new file mode 100644 index 00000000000000..5e45a816841549 --- /dev/null +++ b/tools/labs/templates/assignments/0-list/Kbuild @@ -0,0 +1 @@ +obj-m = list.o diff --git a/tools/labs/templates/assignments/0-list/checker/_checker b/tools/labs/templates/assignments/0-list/checker/_checker new file mode 100755 index 00000000000000..4f15f8846818ad --- /dev/null +++ b/tools/labs/templates/assignments/0-list/checker/_checker @@ -0,0 +1,775 @@ +#!/bin/sh + +# +# List kernel API checker +# +# + +# Enable/disable debug (1/0). +DEBUG_=1 + +DEBUG() +{ + if test "x$DEBUG_" = "x1"; then + $@ 1>&2 + fi +} + +max_points=90 + +# Enable/disable exiting when program fails. +EXIT_IF_FAIL=0 + +test_do_fail() +{ + points=$1 + printf "failed [00/%02d]\n" "$max_points" + if test "x$EXIT_IF_FAIL" = "x1"; then + exit 1 + fi +} + +test_do_pass() +{ + points=$1 + printf "passed [%02d/%02d]\n" "$points" "$max_points" +} + +basic_test() +{ + message=$1 + points=$2 + shift 2 + test_command=$@ + + printf "%s" "$message" + + i=0 + limit=$((60 - ${#message})) + while test "$i" -lt "$limit"; do + printf "." + i=$(($i+1)) + done + + $test_command > /dev/null 2>&1 + if test $? -eq 0; then + test_do_pass "$points" + else + test_do_fail "$points" + fi +} + +module="list" +module_file="$module".ko +proc_folder="/proc/list" +preview="$proc_folder/preview" +management="$proc_folder/management" + +init_test() +{ + dmesg -c > /dev/null 2>&1 + insmod "$module_file" + if test $? -ne 0; then + echo "Error inserting module." 1>&2 + exit 1 + fi + sleep 1 +} + +cleanup_test() +{ + rmmod "$module" +} + +test_module_exists() +{ + init_test + + lsmod 2> /dev/null | grep -w list > /dev/null 2>&1 + basic_test "module_exists" 1 test "$?" -eq 0 + + cleanup_test +} + +test_proc_folder_exits() +{ + init_test + + basic_test "folder_exists" 1 test -d "$proc_folder" + + cleanup_test +} + +test_preview_exits() +{ + init_test + + basic_test "preview_exists" 1 test -f "$preview" + + cleanup_test +} + +test_management_exits() +{ + init_test + + basic_test "management_exists" 1 test -f "$management" + + cleanup_test +} + +test_preview_is_readable() +{ + init_test + + cat "$preview" > /dev/null 2>&1 + basic_test "preview_is_readable" 1 test $? -eq 0 + + cleanup_test +} + +test_preview_is_not_writable() +{ + init_test + + echo "hello" 2>&1 > "$preview" | grep "Input/output error" > /dev/null + basic_test "preview_is_not_writable" 1 test $? -eq 0 + + cleanup_test +} + +test_management_is_writable() +{ + init_test + + echo "hello" 2>&1 > "$management" | grep "Input/output error" > /dev/null + basic_test "management_is_writable" 1 test $? -ne 0 + + cleanup_test +} + +test_management_is_not_readable() +{ + init_test + + cat "$management" > /dev/null 2>&1 + basic_test "management_is_not_readable" 1 test $? -ne 0 + + cleanup_test +} + +test_addf_no_error() +{ + init_test + + echo "addf lorem" > "$management" 2> /dev/null + basic_test "addf_no_error" 2 test $? -eq 0 + + cleanup_test +} + +test_addf_print() +{ + init_test + + echo "addf lorem" > "$management" + cat "$preview" | grep "lorem" > /dev/null + basic_test "addf_print" 2 test $? -eq 0 + + cleanup_test +} + +test_addf_print_once() +{ + init_test + + echo "addf lorem" > "$management" + no=$(cat "$preview" | grep "lorem" | wc -l) + basic_test "addf_print_once" 2 test "$no" -eq 1 + + cleanup_test +} + +test_adde_no_error() +{ + init_test + + echo "adde lorem" > "$management" 2> /dev/null + basic_test "adde_no_error" 2 test $? -eq 0 + + cleanup_test +} + +test_adde_print() +{ + init_test + + echo "adde lorem" > "$management" + cat "$preview" | grep "lorem" > /dev/null + basic_test "adde_print" 2 test $? -eq 0 + + cleanup_test +} + +test_adde_print_once() +{ + init_test + + echo "adde lorem" > "$management" + no=$(cat "$preview" | grep "lorem" | wc -l) + basic_test "adde_print_once" 2 test "$no" -eq 1 + + cleanup_test +} + +test_addf_two_number() +{ + init_test + + echo "addf lorem" > "$management" + echo "addf ipsum" > "$management" + no=$(cat "$preview" | wc -l) + basic_test "addf_two_number" 2 test "$no" -eq 2 + + cleanup_test +} + +test_addf_two_content() +{ + init_test + + echo "addf lorem" > "$management" + echo "addf ipsum" > "$management" + cat "$preview" | grep "lorem" > /dev/null + ret1=$? + cat "$preview" | grep "ipsum" > /dev/null + ret2=$? + basic_test "addf_two_content" 2 test "$ret1" -eq 0 -a "$ret2" -eq 0 + + cleanup_test +} + +test_addf_same_twice_number() +{ + init_test + + echo "addf lorem" > "$management" + echo "addf lorem" > "$management" + no=$(cat "$preview" | wc -l) + basic_test "addf_same_twice_number" 2 test "$no" -eq 2 + + cleanup_test +} + +test_addf_same_twice_content() +{ + init_test + + echo "addf lorem" > "$management" + echo "addf lorem" > "$management" + cat "$preview" | grep "lorem" > /dev/null + basic_test "addf_same_twice_content" 2 test $? -eq 0 + + cleanup_test +} + +test_addf_same_twice_content_number() +{ + init_test + + echo "addf lorem" > "$management" + echo "addf lorem" > "$management" + no=$(cat "$preview" | grep "lorem" | wc -l) + basic_test "addf_same_twice_content_number" 2 test "$no" -eq 2 + + cleanup_test +} + +test_adde_two_number() +{ + init_test + + echo "adde lorem" > "$management" + echo "adde ipsum" > "$management" + no=$(cat "$preview" | wc -l) + basic_test "adde_two_number" 2 test "$no" -eq 2 + + cleanup_test +} + +test_adde_two_content() +{ + init_test + + echo "adde lorem" > "$management" + echo "adde ipsum" > "$management" + cat "$preview" | grep "lorem" > /dev/null + ret1=$? + cat "$preview" | grep "ipsum" > /dev/null + ret2=$? + basic_test "adde_two_content" 2 test "$ret1" -eq 0 -a "$ret2" -eq 0 + + cleanup_test +} + +test_adde_same_twice_number() +{ + init_test + + echo "adde lorem" > "$management" + echo "adde lorem" > "$management" + no=$(cat "$preview" | wc -l) + basic_test "adde_same_twice_number" 2 test "$no" -eq 2 + + cleanup_test +} + +test_adde_same_twice_content() +{ + init_test + + echo "adde lorem" > "$management" + echo "adde lorem" > "$management" + cat "$preview" | grep "lorem" > /dev/null + basic_test "adde_same_twice_content" 2 test $? -eq 0 + + cleanup_test +} + +test_adde_same_twice_content_number() +{ + init_test + + echo "adde lorem" > "$management" + echo "adde lorem" > "$management" + no=$(cat "$preview" | grep "lorem" | wc -l) + basic_test "adde_same_twice_content_number" 2 test "$no" -eq 2 + + cleanup_test +} + +test_addf_front_after_addf() +{ + init_test + + echo "addf lorem" > "$management" + echo "addf ipsum" > "$management" + + # "ipsum" must be first. + cat "$preview" | head -n 1 | grep "ipsum" > /dev/null + basic_test "addf_front_after_addf" 2 test $? -eq 0 + + cleanup_test +} + +test_addf_front_after_adde() +{ + init_test + + echo "adde lorem" > "$management" + echo "addf ipsum" > "$management" + + # "ipsum" must be first. + cat "$preview" | head -n 1 | grep "ipsum" > /dev/null + basic_test "addf_front_after_adde" 2 test $? -eq 0 + + cleanup_test +} + +test_addf_front_after_adde_addf() +{ + init_test + + echo "adde lorem" > "$management" + echo "addf ipsum" > "$management" + echo "addf dolor" > "$management" + + # "dolor" must be first. + cat "$preview" | head -n 1 | grep "dolor" > /dev/null + basic_test "addf_front_after_adde_addf" 2 test $? -eq 0 + + cleanup_test +} + +test_addf_front_after_addf_adde() +{ + init_test + + echo "addf lorem" > "$management" + echo "adde ipsum" > "$management" + echo "addf dolor" > "$management" + + # "dolor" must be first. + cat "$preview" | head -n 1 | grep "dolor" > /dev/null + basic_test "addf_front_after_addf_adde" 2 test $? -eq 0 + + cleanup_test +} + +test_adde_end_after_addf() +{ + init_test + + echo "addf lorem" > "$management" + echo "adde ipsum" > "$management" + + # "ipsum" must be last. + cat "$preview" | tail -n -1 | grep "ipsum" > /dev/null + basic_test "adde_end_after_addf" 2 test $? -eq 0 + + cleanup_test +} + +test_adde_end_after_adde() +{ + init_test + + echo "adde lorem" > "$management" + echo "adde ipsum" > "$management" + + # "ipsum" must be last. + cat "$preview" | tail -n -1 | grep "ipsum" > /dev/null + basic_test "addf_end_after_adde" 2 test $? -eq 0 + + cleanup_test +} + +test_adde_end_after_adde_addf() +{ + init_test + + echo "adde lorem" > "$management" + echo "addf ipsum" > "$management" + echo "adde dolor" > "$management" + + # "dolor" must be last. + cat "$preview" | tail -n -1 | grep "dolor" > /dev/null + basic_test "adde_end_after_adde_addf" 2 test $? -eq 0 + + cleanup_test +} + +test_adde_end_after_addf_adde() +{ + init_test + + echo "addf lorem" > "$management" + echo "adde ipsum" > "$management" + echo "adde dolor" > "$management" + + # "dolor" must be last. + cat "$preview" | tail -n -1 | grep "dolor" > /dev/null + basic_test "adde_end_after_addf_adde" 2 test $? -eq 0 + + cleanup_test +} + +test_delf_no_error() +{ + init_test + + echo "addf lorem" > "$management" + echo "delf lorem" > "$management" + basic_test "delf_no_error" 2 test $? -eq 0 + + cleanup_test +} + +test_delf_removes_front() +{ + init_test + + echo "addf lorem" > "$management" + echo "delf lorem" > "$management" + no=$(cat "$preview" | wc -l) + basic_test "delf_removes_front" 2 test "$no" -eq 0 + + cleanup_test +} + +test_delf_removes_end() +{ + init_test + + echo "adde lorem" > "$management" + echo "delf lorem" > "$management" + no=$(cat "$preview" | wc -l) + basic_test "delf_removes_end" 2 test "$no" -eq 0 + + cleanup_test +} + +test_delf_front_number() +{ + init_test + + echo "addf lorem" > "$management" + echo "addf ipsum" > "$management" + echo "addf dolor" > "$management" + echo "delf lorem" > "$management" + no=$(cat "$preview" | wc -l) + basic_test "delf_front_number" 2 test "$no" -eq 2 + + cleanup_test +} + +test_delf_front_content() +{ + init_test + + echo "addf lorem" > "$management" + echo "addf ipsum" > "$management" + echo "addf dolor" > "$management" + echo "delf lorem" > "$management" + cat "$preview" | grep "lorem" > /dev/null + basic_test "delf_front_content" 2 test $? -ne 0 + + cleanup_test +} + +test_delf_first() +{ + init_test + + echo "addf lorem" > "$management" + echo "addf ipsum" > "$management" + echo "addf dolor" > "$management" + echo "delf ipsum" > "$management" + cat "$preview" | grep "ipsum" > /dev/null + basic_test "delf_first" 2 test $? -ne 0 + + cleanup_test +} + +test_delf_none() +{ + init_test + + echo "addf lorem" > "$management" + echo "adde ipsum" > "$management" + echo "delf dolor" > "$management" 2> /dev/null + no=$(cat "$preview" | wc -l) + basic_test "delf_none" 2 test "$no" -eq 2 + + cleanup_test +} + +test_dela_one_number() +{ + init_test + + echo "addf lorem" > "$management" + echo "addf ipsum" > "$management" + echo "addf dolor" > "$management" + echo "dela ipsum" > "$management" + no=$(cat "$preview" | wc -l) + basic_test "dela_one_number" 2 test "$no" -eq 2 + + cleanup_test +} + +test_dela_one_content() +{ + init_test + + echo "addf lorem" > "$management" + echo "addf ipsum" > "$management" + echo "addf dolor" > "$management" + echo "dela ipsum" > "$management" + cat "$preview" | grep "ipsum" > /dev/null + basic_test "dela_one_content" 2 test $? -ne 0 + + cleanup_test +} + +test_dela_two_number() +{ + init_test + + echo "addf lorem" > "$management" + echo "addf ipsum" > "$management" + echo "addf dolor" > "$management" + echo "adde ipsum" > "$management" + echo "dela ipsum" > "$management" + no=$(cat "$preview" | wc -l) + basic_test "dela_one_number" 2 test "$no" -eq 2 + + cleanup_test +} + +test_dela_two_content() +{ + init_test + + echo "adde lorem" > "$management" + echo "addf ipsum" > "$management" + echo "addf dolor" > "$management" + echo "adde ipsum" > "$management" + echo "dela ipsum" > "$management" + cat "$preview" | grep "ipsum" > /dev/null + basic_test "dela_one_content" 2 test $? -ne 0 + + cleanup_test +} + +test_dela_all_five_number() +{ + init_test + + echo "addf lorem" > "$management" + echo "adde lorem" > "$management" + echo "addf lorem" > "$management" + echo "adde lorem" > "$management" + echo "addf lorem" > "$management" + echo "dela lorem" > "$management" + no=$(cat "$preview" | wc -l) + basic_test "dela_all_five_number" 2 test "$no" -eq 0 + + cleanup_test +} + +test_dela_all_five_content() +{ + init_test + + echo "addf lorem" > "$management" + echo "adde lorem" > "$management" + echo "addf lorem" > "$management" + echo "adde lorem" > "$management" + echo "addf lorem" > "$management" + echo "dela lorem" > "$management" + cat "$preview" | grep "lorem" > /dev/null + basic_test "dela_all_five_content" 2 test $? -ne 0 + + cleanup_test +} + +test_dela_none() +{ + init_test + + echo "adde lorem" > "$management" + echo "addf ipsum" > "$management" + echo "dela dolor" > "$management" 2> /dev/null + no=$(cat "$preview" | wc -l) + basic_test "dela_none" 2 test "$no" -eq 2 + + cleanup_test +} + +test_mix_number() +{ + init_test + + echo "addf lorem" > "$management" + echo "adde ipsum" > "$management" + echo "adde dolor" > "$management" + echo "addf sit" > "$management" + echo "adde sit" > "$management" + echo "addf sit" > "$management" + echo "addf lorem" > "$management" + echo "delf lorem" > "$management" + echo "delf dolor" > "$management" + echo "adde dolor" > "$management" + echo "dela sit" > "$management" + echo "delf ipsum" > "$management" + echo "delf ipsum" > "$management" + echo "dela dolor" > "$management" + no=$(cat "$preview" | wc -l) + basic_test "mix_number" 3 test "$no" -eq 1 + + cleanup_test +} + +test_mix_content() +{ + init_test + + echo "addf lorem" > "$management" + echo "adde ipsum" > "$management" + echo "adde dolor" > "$management" + echo "addf sit" > "$management" + echo "adde sit" > "$management" + echo "addf sit" > "$management" + echo "addf lorem" > "$management" + echo "delf lorem" > "$management" + echo "delf dolor" > "$management" + echo "adde dolor" > "$management" + echo "dela sit" > "$management" + echo "delf ipsum" > "$management" + echo "delf ipsum" > "$management" + echo "dela dolor" > "$management" + cat "$preview" | grep "lorem" > /dev/null + basic_test "mix_content" 3 test "$no" -eq 1 + + cleanup_test +} + +run_tests() +{ + test_module_exists + test_proc_folder_exits + test_preview_exits + test_management_exits + test_preview_is_readable + test_preview_is_not_writable + test_management_is_writable + test_management_is_not_readable + test_addf_no_error + test_addf_print + test_addf_print_once + test_adde_no_error + test_adde_print + test_adde_print_once + test_addf_two_number + test_addf_two_content + test_addf_same_twice_number + test_addf_same_twice_content + test_addf_same_twice_content_number + test_adde_two_number + test_adde_two_content + test_adde_same_twice_number + test_adde_same_twice_content + test_adde_same_twice_content_number + test_addf_front_after_addf + test_addf_front_after_adde + test_addf_front_after_adde_addf + test_addf_front_after_addf_adde + test_adde_end_after_addf + test_adde_end_after_adde + test_adde_end_after_adde_addf + test_adde_end_after_addf_adde + test_delf_no_error + test_delf_removes_front + test_delf_removes_end + test_delf_front_number + test_delf_front_content + test_delf_first + test_delf_none + test_dela_one_number + test_dela_one_content + test_dela_two_number + test_dela_two_content + test_dela_all_five_number + test_dela_all_five_content + test_dela_none + test_mix_number + test_mix_content +} + +run_tests | tee results.txt + +cat results.txt | grep '\[.*\]$' | awk -F '[] /[]+' ' +BEGIN { + sum=0 +} + +{ + sum += $(NF-2); +} + +END { + printf "\n%66s [%02d/90]\n", "Total:", sum; +}' + +rm results.txt diff --git a/tools/labs/templates/assignments/0-list/list.c b/tools/labs/templates/assignments/0-list/list.c new file mode 100644 index 00000000000000..ec31196bb7ba74 --- /dev/null +++ b/tools/labs/templates/assignments/0-list/list.c @@ -0,0 +1,117 @@ +// SPDX-License-Identifier: GPL-2.0+ + +/* + * list.c - Linux kernel list API + * + * TODO 1/0: Fill in name / email + * Author: FirstName LastName + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#define PROCFS_MAX_SIZE 512 + +#define procfs_dir_name "list" +#define procfs_file_read "preview" +#define procfs_file_write "management" + +struct proc_dir_entry *proc_list; +struct proc_dir_entry *proc_list_read; +struct proc_dir_entry *proc_list_write; + +/* TODO 2/0: define your list! */ + +static int list_proc_show(struct seq_file *m, void *v) +{ + /* TODO 3/0: print your list. One element / line. */ + seq_puts(m, "Remove this line\n"); + + return 0; +} + +static int list_read_open(struct inode *inode, struct file *file) +{ + return single_open(file, list_proc_show, NULL); +} + +static int list_write_open(struct inode *inode, struct file *file) +{ + return single_open(file, list_proc_show, NULL); +} + +static ssize_t list_write(struct file *file, const char __user *buffer, + size_t count, loff_t *offs) +{ + char local_buffer[PROCFS_MAX_SIZE]; + unsigned long local_buffer_size = 0; + + local_buffer_size = count; + if (local_buffer_size > PROCFS_MAX_SIZE) + local_buffer_size = PROCFS_MAX_SIZE; + + memset(local_buffer, 0, PROCFS_MAX_SIZE); + if (copy_from_user(local_buffer, buffer, local_buffer_size)) + return -EFAULT; + + /* local_buffer contains your command written in /proc/list/management + * TODO 4/0: parse the command and add/delete elements. + */ + + return local_buffer_size; +} + +static const struct proc_ops r_pops = { + .proc_open = list_read_open, + .proc_read = seq_read, + .proc_release = single_release, +}; + +static const struct proc_ops w_pops = { + .proc_open = list_write_open, + .proc_write = list_write, + .proc_release = single_release, +}; + +static int list_init(void) +{ + proc_list = proc_mkdir(procfs_dir_name, NULL); + if (!proc_list) + return -ENOMEM; + + proc_list_read = proc_create(procfs_file_read, 0000, proc_list, + &r_pops); + if (!proc_list_read) + goto proc_list_cleanup; + + proc_list_write = proc_create(procfs_file_write, 0000, proc_list, + &w_pops); + if (!proc_list_write) + goto proc_list_read_cleanup; + + return 0; + +proc_list_read_cleanup: + proc_remove(proc_list_read); +proc_list_cleanup: + proc_remove(proc_list); + return -ENOMEM; +} + +static void list_exit(void) +{ + proc_remove(proc_list); +} + +module_init(list_init); +module_exit(list_exit); + +MODULE_DESCRIPTION("Linux kernel list API"); +/* TODO 5/0: Fill in your name / email address */ +MODULE_AUTHOR("FirstName LastName "); +MODULE_LICENSE("GPL v2"); diff --git a/tools/labs/templates/assignments/00-hello/Kbuild b/tools/labs/templates/assignments/00-hello/Kbuild new file mode 100644 index 00000000000000..9de37409e9e7c6 --- /dev/null +++ b/tools/labs/templates/assignments/00-hello/Kbuild @@ -0,0 +1,3 @@ +EXTRA_CFLAGS = -Wall -g -m32 + +obj-m = hello-world.o diff --git a/tools/labs/templates/assignments/00-hello/checker/_checker b/tools/labs/templates/assignments/00-hello/checker/_checker new file mode 100755 index 00000000000000..4b8a0869da2adf --- /dev/null +++ b/tools/labs/templates/assignments/00-hello/checker/_checker @@ -0,0 +1,12 @@ +#!/bin/sh + +/bin/dmesg -c > /dev/null 2>&1 +/sbin/rmmod hello-world > /dev/null 2>&1 +/sbin/insmod hello-world.ko +/bin/dmesg | grep 'Hello, World!' > /dev/null 2>&1 +if test $? -eq 0; then + echo "Test PASSED." +else + echo "Test FAILED." +fi +/sbin/rmmod hello-world > /dev/null 2>&1 diff --git a/tools/labs/templates/assignments/00-hello/hello-world.c b/tools/labs/templates/assignments/00-hello/hello-world.c new file mode 100644 index 00000000000000..c8c067b1ddf0ad --- /dev/null +++ b/tools/labs/templates/assignments/00-hello/hello-world.c @@ -0,0 +1,23 @@ +#include +#include +#include + +MODULE_DESCRIPTION("Hello World"); +MODULE_AUTHOR("Psoru Lesfo Rever"); +MODULE_LICENSE("GPL"); + + +static int hello_init(void) +{ + /* TODO: Print "Hello, World!" */ + pr_info("Hello, World!\n"); + + return 0; +} + +static void hello_exit(void) +{ +} + +module_init(hello_init); +module_exit(hello_exit); diff --git a/tools/labs/templates/assignments/1-tracer/checker/Makefile b/tools/labs/templates/assignments/1-tracer/checker/Makefile new file mode 100644 index 00000000000000..ed86666ec4df3c --- /dev/null +++ b/tools/labs/templates/assignments/1-tracer/checker/Makefile @@ -0,0 +1,19 @@ +LDFLAGS = -m32 + +.PHONY: all clean + +all: tracer_test + +tracer_test: tracer_test.o + +tracer_test.o: _test/tracer_test.c + make -C _test + ln -sfn _test/tracer_test.o $@ + +run: all + ./_checker + +clean: + -rm -f *~ + -rm -f tracer_test.o tracer_test + -make -C _test clean diff --git a/tools/labs/templates/assignments/1-tracer/checker/README b/tools/labs/templates/assignments/1-tracer/checker/README new file mode 100644 index 00000000000000..75c90417a94fb6 --- /dev/null +++ b/tools/labs/templates/assignments/1-tracer/checker/README @@ -0,0 +1,113 @@ += KPROBE BASED TRACER TEST SUITE == + +Test suite for kprobe based tracer + +== FILES == + +README + * this file + +Makefile + * Makefile for automating the build process + +_checker + * script to run all tests defined in _test/tracer_test.c + +_test/Makefile + * test suite internal Makefile (creates necessary object files) + +_test/tracer_test.c + * test suite for Kprobe Based Tracer + +_test/tracer_test.h + * test suite header file + +_test/tracer.h + * kprobe tracer header file (macros and structures) + +_test/test.h + * useful macros for testing + +_test/debug.h + * debugging macros + +_test/util.h + * useful macros for generic use (error processing) + +_helper/* + * helper kernel module for simulating kprobed ops + +== BUILDING == + +Use the linux-kernel-labs build infrastructure, i.e run + + make build + +in the tools/labs/ folder. + +If you want to do things by hand, you have to use the Makefile in the +current folder to run build the executable: + + make + +and the kernel module-specific Makefile command to build the kernel module: + +make -C $(KDIR) M=$(KDIR)/tools/labs/skels/assignments/1-tracer/checker/_helper/ ARCH=x86 modules + +== RUNNING == + +Use the linux-kernel-labs run infrastructure, i.e. run + + make copy + +This copies the skel/ subfolder in the future root filesystem image for the +QEMU/KVM virtual machine. + +If you want to do things by hand, copy your tracer.ko module and _checker, +tracer_test and tracer_helper.ko to fsimg/root directory on your QEMU/KVM +virtual machine. + +In order to run the test suite you can either use the _checker +script or run the tracer_test executable. + +The _checker script runs all tests and computes assignment grade: + + ./_checker + +In order to run a specific test pass the test number (1 .. 10) to the +tracer_test executable. + + ./tracer_test 5 + +== TESTS == + +Tests are basically unit tests. A single function in the test_fun_array (see +tracer_test.c) is called each time the tracer_test executable is invoked, +testing a single functionality (and assuming previous tests have been run and +passed). + +The EXIT_IF_FAIL macro (see test.h) is unnecessary since after each test, the +program completes. + +Each test function follows the unit test pattern: initialization, action, +evaluation. The test macro (see test.h) is invoked at the end of each test +for evaluating and grading the test. + +== DEBUGGING == + +The debug.h header file consists of several macros useful for debugging +(dprintf, dlog). There are multiple uses of these macros throughout the above +files. + +In order to turn debug messages on, you must define the DEBUG macro, either in +a header file, or, I suggest, in the Makefile. The LOG_LEVEL macro limits the +log message types that are to be printed, by default LOG_WARNING (see enum in +debug.h). You may redefine it in a header file or in the Makefile. + +Rapid enabling of debug messages is achieved by commenting out the CPPFLAGS +line in the Makefile. It turns on debugging and enables all log messages +(LOG_DEBUG). + +== OTHER == + +srand48() and drand48() are used for generating random numbers. diff --git a/tools/labs/templates/assignments/1-tracer/checker/_checker b/tools/labs/templates/assignments/1-tracer/checker/_checker new file mode 100755 index 00000000000000..f5a4a7da1b5d26 --- /dev/null +++ b/tools/labs/templates/assignments/1-tracer/checker/_checker @@ -0,0 +1,24 @@ +#!/bin/sh + +first_test=1 +last_test=10 +executable=tracer_test + +for i in $(seq $first_test $last_test); do + ./"$executable" $i +done | tee results.txt + +cat results.txt | grep '\[.*\]$' | awk -F '[] /[]+' ' +BEGIN { + sum=0 +} + +{ + sum += $2; +} + +END { + printf "\n%66s [%03d/100]\n", "Total:", sum; +}' + +rm -f results.txt diff --git a/tools/labs/templates/assignments/1-tracer/checker/_helper/Kbuild b/tools/labs/templates/assignments/1-tracer/checker/_helper/Kbuild new file mode 100644 index 00000000000000..7a7b493cfd9a0f --- /dev/null +++ b/tools/labs/templates/assignments/1-tracer/checker/_helper/Kbuild @@ -0,0 +1,4 @@ +EXTRA_CFLAGS = -Wall -g + +obj-m = tracer_helper.o + diff --git a/tools/labs/templates/assignments/1-tracer/checker/_helper/helper.h b/tools/labs/templates/assignments/1-tracer/checker/_helper/helper.h new file mode 100644 index 00000000000000..3cb30e03c9dcb9 --- /dev/null +++ b/tools/labs/templates/assignments/1-tracer/checker/_helper/helper.h @@ -0,0 +1,35 @@ +#ifndef _HELPER__ +#define _HELPER__ + +#include + +#define NAMESIZE 64 +#define MCOUNT 128 + +#define PREPARE_TEST _IOW('t', 19, unsigned int) +#define START_TEST _IOW('t', 20, unsigned int) +#define STOP_TEST _IOW('t', 21, unsigned int) + +/*XXX match test_params with tracers_stats + * perhaps use the same struct + */ +struct test_params { + pid_t pid; + char thread_name[NAMESIZE]; + int idx; /* index for multi-kthreaded test */ + /* + * kcalls: 5 + * alloc : [1024] [8] [128] [10] [128] + * free : [0] [0] [1] [0] [1] + */ + int kcalls; /* number of kmalloc calls */ + int alloc[MCOUNT]; /* sizes of kmalloc allocations */ + int free[MCOUNT]; /* intmap for which allocations to free */ + int sched; + int up; + int down; + int lock; + int unlock; +}; + +#endif diff --git a/tools/labs/templates/assignments/1-tracer/checker/_helper/tracer_helper.c b/tools/labs/templates/assignments/1-tracer/checker/_helper/tracer_helper.c new file mode 100644 index 00000000000000..1c93d230c45511 --- /dev/null +++ b/tools/labs/templates/assignments/1-tracer/checker/_helper/tracer_helper.c @@ -0,0 +1,201 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "helper.h" + + +#define HELPER_MINOR 30 + +/* number of paralel kernel threads */ +static wait_queue_head_t wq[MCOUNT]; + +/*XXX: this looks like a poor design, please refactor */ +static int thread_prepared[MCOUNT] = {0, }; +static int thread_running[MCOUNT] = {0, }; +static int thread_should_stop[MCOUNT] = {0, }; +static struct task_struct *t[MCOUNT] = {NULL, }; + +static struct test_params tp[MCOUNT]; +static int tcount; + +void do_work(void) +{ + int i, j; + int a = 0; + + for (i = 0; i < 1000; i++) + for (j = 0; j < 1000; j++) + a = i * j; +} + +int thread_fn(void *data) +{ + int i; + + void *k_addr[MCOUNT]; + struct semaphore sem; + struct mutex lock; + + struct test_params *tp; + + tp = (struct test_params *)data; + + thread_prepared[tp->idx] = 1; + wake_up_interruptible(&wq[tp->idx]); + + wait_event_interruptible(wq[tp->idx], thread_running[tp->idx] == 1); + + for (i = 0; i < tp->kcalls; i++) + k_addr[i] = kmalloc(tp->alloc[i], GFP_KERNEL); + + /*XXX: do proper cleanup, avoid memory leaks */ + for (i = 0; i < tp->kcalls; i++) + if (tp->free[i] && k_addr[i]) + kfree(k_addr[i]); + + for (i = 0; i < tp->sched; i++) + schedule(); + + /* ***: use tp->down for down_interruptible */ + sema_init(&sem, 1); + for (i = 0; i < tp->up; i++) { + up(&sem); + do_work(); + down_interruptible(&sem); + } + /* ***: use to->unlock for mutex_unlock */ + mutex_init(&lock); + for (i = 0; i < tp->lock; i++) { + mutex_lock(&lock); + do_work(); + mutex_unlock(&lock); + } + + wait_event_interruptible(wq[tp->idx], thread_should_stop[tp->idx] == 1); + + /* reset state machine */ + thread_prepared[tp->idx] = 0; + thread_running[tp->idx] = 0; + thread_should_stop[tp->idx] = 0; + + return 0; +} +static int helper_open(struct inode *inode, struct file *file) +{ +#ifdef DEBUG + pr_info("tracer-helper: open\n"); +#endif + return 0; +} + +static int helper_release(struct inode *inode, struct file *file) +{ +#ifdef DEBUG + pr_info("tracer-helper: close\n"); +#endif + return 0; +} + +static long helper_ioctl(struct file *filp, unsigned int cmd, + unsigned long arg) +{ + int ret = 0; + + switch (cmd) { + case PREPARE_TEST: + if (copy_from_user(&tp[tcount], (struct test_param *)arg, + sizeof(tp[tcount]))) { + pr_info("Error copy to user\n"); + return -EFAULT; + } + t[tp[tcount].idx] = kthread_run(thread_fn, &tp[tcount], "%s", + tp[tcount].thread_name); + if (!t[tp[tcount].idx]) { + pr_info("Could not create thread!\n"); + return -ENOMEM; + } + + ret = t[tp[tcount].idx]->pid; + wait_event_interruptible(wq[tp[tcount].idx], + thread_prepared[tp[tcount].idx] == 1); + tcount++; + break; + case START_TEST: +#if 0 + pr_info("%s: start test for idx %lu\n", __func__, arg); +#endif + thread_running[arg] = 1; + wake_up_interruptible(&wq[arg]); + break; + case STOP_TEST: +#if 0 + pr_info("%s: stop test for idx %lu\n", __func__, arg); +#endif + thread_should_stop[arg] = 1; + wake_up_interruptible(&wq[arg]); + kthread_stop(t[arg]); + break; + default: + break; + } + + return ret; +} + +static const struct file_operations tracer_fops = { + .open = helper_open, + .unlocked_ioctl = helper_ioctl, + .release = helper_release, +}; + +static struct miscdevice helper_dev = { + .minor = HELPER_MINOR, + .name = "helper", + .fops = &tracer_fops, +}; + +static int __init tracer_helper_init(void) +{ + int rc, i; + + rc = misc_register(&helper_dev); + if (rc < 0) { + pr_err("misc_register: fail\n"); + return rc; + } + + for (i = 0; i < MCOUNT; i++) + init_waitqueue_head(&wq[i]); +#ifdef DEBUG + pr_info("tracer-helper: init\n"); +#endif + return 0; +} + +static void __exit tracer_helper_exit(void) +{ + misc_deregister(&helper_dev); +#ifdef DEBUG + pr_info("tracer-helper: exit\n"); +#endif +} + +MODULE_AUTHOR("Daniel Baluta"); +MODULE_LICENSE("GPL"); + +module_init(tracer_helper_init); +module_exit(tracer_helper_exit); diff --git a/tools/labs/templates/assignments/1-tracer/checker/_test/Makefile b/tools/labs/templates/assignments/1-tracer/checker/_test/Makefile new file mode 100644 index 00000000000000..00160bc9b3647c --- /dev/null +++ b/tools/labs/templates/assignments/1-tracer/checker/_test/Makefile @@ -0,0 +1,10 @@ +#CPPFLAGS = -DDEBUG -DLOG_LEVEL=LOG_DEBUG +CFLAGS = -Wall -g -m32 + +.PHONY: all clean + +all: tracer_test.o + +clean: + -rm -f *~ + -rm -f tracer_test.o diff --git a/tools/labs/templates/assignments/1-tracer/checker/_test/debug.h b/tools/labs/templates/assignments/1-tracer/checker/_test/debug.h new file mode 100644 index 00000000000000..debdeccf492038 --- /dev/null +++ b/tools/labs/templates/assignments/1-tracer/checker/_test/debug.h @@ -0,0 +1,77 @@ +/* + * debugging macros + * heavily inspired by previous work and Internet resources + * + * uses C99 variadic macros + * uses non-standard usage of the token-paste operator (##) for + * removing the comma symbol (,) when not followed by a token + * uses non-standard __FUNCTION__ macro (MSVC doesn't support __func__) + * tested on gcc 4.4.5 and Visual Studio 2008 (9.0), compiler version 15.00 + * + * 2011, Razvan Deaconescu, razvan.deaconescu@cs.pub.ro + */ + +#ifndef DEBUG_H_ +#define DEBUG_H_ 1 + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +/* log levels */ +enum { + LOG_EMERG = 1, + LOG_ALERT, + LOG_CRIT, + LOG_ERR, + LOG_WARNING, + LOG_NOTICE, + LOG_INFO, + LOG_DEBUG +}; + +/* + * initialize default loglevel (for dlog) + * may be redefined in the including code + */ + +#ifndef LOG_LEVEL +#define LOG_LEVEL LOG_WARNING +#endif + +/* + * define DEBUG macro as a compiler option: + * -DDEBUG for GCC + * /DDEBUG for MSVC + */ + +#if defined DEBUG +#define dprintf(format, ...) \ + fprintf(stderr, " [%s(), %s:%u] " format, \ + __func__, __FILE__, __LINE__, \ + ##__VA_ARGS__) +#else +#define dprintf(format, ...) \ + do { \ + } while (0) +#endif + +#if defined DEBUG +#define dlog(level, format, ...) \ + do { \ + if (level <= LOG_LEVEL) \ + dprintf(format, ##__VA_ARGS__); \ + } while (0) +#else +#define dlog(level, format, ...) \ + do { \ + } while (0) +#endif + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/tools/labs/templates/assignments/1-tracer/checker/_test/helper.h b/tools/labs/templates/assignments/1-tracer/checker/_test/helper.h new file mode 100644 index 00000000000000..3cb30e03c9dcb9 --- /dev/null +++ b/tools/labs/templates/assignments/1-tracer/checker/_test/helper.h @@ -0,0 +1,35 @@ +#ifndef _HELPER__ +#define _HELPER__ + +#include + +#define NAMESIZE 64 +#define MCOUNT 128 + +#define PREPARE_TEST _IOW('t', 19, unsigned int) +#define START_TEST _IOW('t', 20, unsigned int) +#define STOP_TEST _IOW('t', 21, unsigned int) + +/*XXX match test_params with tracers_stats + * perhaps use the same struct + */ +struct test_params { + pid_t pid; + char thread_name[NAMESIZE]; + int idx; /* index for multi-kthreaded test */ + /* + * kcalls: 5 + * alloc : [1024] [8] [128] [10] [128] + * free : [0] [0] [1] [0] [1] + */ + int kcalls; /* number of kmalloc calls */ + int alloc[MCOUNT]; /* sizes of kmalloc allocations */ + int free[MCOUNT]; /* intmap for which allocations to free */ + int sched; + int up; + int down; + int lock; + int unlock; +}; + +#endif diff --git a/tools/labs/templates/assignments/1-tracer/checker/_test/test.h b/tools/labs/templates/assignments/1-tracer/checker/_test/test.h new file mode 100644 index 00000000000000..49ba56f0eeb525 --- /dev/null +++ b/tools/labs/templates/assignments/1-tracer/checker/_test/test.h @@ -0,0 +1,63 @@ +/* + * generic test suite + * + * test macros and headers + */ + +#ifndef TEST_H_ +#define TEST_H_ 1 + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +/* to be defined by calling program */ +extern int max_points; + +/* + * uncommend EXIT_IF_FAIL macro in order to stop test execution + * at first failed test + */ + +/*#define EXIT_IF_FAIL 1*/ + +#if defined(EXIT_IF_FAIL) +#define test_do_fail(points) \ + do { \ + printf("failed\n"); \ + exit(EXIT_FAILURE); \ + } while (0) +#else +#define test_do_fail(points) \ + printf("failed [ 0/%3d]\n", max_points) +#endif + +#define test_do_pass(points) \ + printf("passed [%3d/%3d]\n", points, max_points) + +#define test(message, test, points) \ + do { \ + size_t _i; \ + int t = (test); \ + \ + printf("%s", message); \ + fflush(stdout); \ + \ + for (_i = 0; _i < 60 - strlen(message); _i++) \ + putchar('.'); \ + \ + if (!t) \ + test_do_fail(points); \ + else \ + test_do_pass(points); \ + \ + fflush(stdout); \ + } while (0) + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/tools/labs/templates/assignments/1-tracer/checker/_test/tracer.h b/tools/labs/templates/assignments/1-tracer/checker/_test/tracer.h new file mode 100644 index 00000000000000..7a55257b770aaf --- /dev/null +++ b/tools/labs/templates/assignments/1-tracer/checker/_test/tracer.h @@ -0,0 +1,21 @@ +/* + * SO2 kprobe based tracer header file + * + * this is shared with user space + */ + +#ifndef TRACER_H__ +#define TRACER_H__ 1 + +#include +#ifndef __KERNEL__ +#include +#endif /* __KERNEL__ */ + +#define TRACER_DEV_MINOR 42 +#define TRACER_DEV_NAME "tracer" + +#define TRACER_ADD_PROCESS _IOW(_IOC_WRITE, 42, pid_t) +#define TRACER_REMOVE_PROCESS _IOW(_IOC_WRITE, 43, pid_t) + +#endif /* TRACER_H_ */ diff --git a/tools/labs/templates/assignments/1-tracer/checker/_test/tracer_test.c b/tools/labs/templates/assignments/1-tracer/checker/_test/tracer_test.c new file mode 100644 index 00000000000000..175ebfa07caff5 --- /dev/null +++ b/tools/labs/templates/assignments/1-tracer/checker/_test/tracer_test.c @@ -0,0 +1,779 @@ +/* + * SO2 Kprobe based tracer - test suite + * + * Authors: + * Daniel Baluta + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#include "test.h" +#include "debug.h" +#include "util.h" + +#include "tracer.h" +#include "tracer_test.h" +#include "helper.h" + +/* use this to enable stats debugging */ +#if 0 +#define DEBUG +#endif + +#define MSECS 1000 + +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) + +struct tracer_stats { + pid_t tr_pid; + int tr_alloc; + int tr_free; + int tr_mem; + int tr_mem_free; + int tr_sched; + int tr_up; + int tr_down; + int tr_lock; + int tr_unlock; +}; + +struct test_case { + char test_name[NAMESIZE]; + int score; + struct test_params test_params; +}; + +struct tracer_stats ts[MCOUNT]; + +struct test_case tc[] = { + /* 0 */ + { + .test_name = "test_simple_kmalloc", + .test_params = { + .thread_name = "xthread-0", + .kcalls = 1, + .alloc = {1024, }, + .idx = 0, + }, + .score = 5, + }, + /* 1 */ + { + .test_name = "test_simple_kfree", + .test_params = { + .thread_name = "xthread-1", + .kcalls = 1, + .alloc = {4096, }, + .free = {1, }, + .idx = 1, + }, + .score = 5, + }, + /* 2 */ + { + .test_name = "test_simple_sched", + .test_params = { + .thread_name = "xthread-2", + .sched = 1, + .idx = 2, + }, + .score = 4, + }, + /* 3 */ + { + .test_name = "test_simple_up_down", + .test_params = { + .thread_name = "xthread-3", + .up = 1, + .down = 1, + .idx = 3, + }, + .score = 4, + }, + /* 4 */ + { + .test_name = "test_simple_lock_unlock", + .test_params = { + .thread_name = "xthread-4", + .lock = 1, + .unlock = 1, + .idx = 4, + }, + .score = 4, + }, + + /* 5 */ + { + .test_name = "test_medium_kmalloc", + .test_params = { + .thread_name = "xthread-5", + .kcalls = 16, + .alloc = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, + .idx = 5, + }, + .score = 5, + }, + + /* 6 */ + { + .test_name = "test_medium_free", + .test_params = { + .thread_name = "xthread-6", + .kcalls = 12, + .alloc = {1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048}, + .free = {0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1}, + .idx = 6, + }, + .score = 5, + }, + + /* 7 */ + { + .test_name = "test_medium_sched", + .test_params = { + .thread_name = "xthread-7", + .sched = 30, + .idx = 7, + }, + .score = 5, + }, + + /* 8 */ + { + .test_name = "test_medium_up_down", + .test_params = { + .thread_name = "xthread-8", + .up = 32, + .down = 32, + .idx = 8, + }, + .score = 4, + }, + /* 9 */ + { + .test_name = "test_medium_lock_unlock", + .test_params = { + .thread_name = "xthread-9", + .lock = 32, + .unlock = 32, + .idx = 9, + }, + .score = 4, + }, + /* 10 */ + { + .test_name = "test_medium_combined", + .test_params = { + .thread_name = "xthread-9", + .kcalls = 9, + .alloc = {1024, 512, 128, 64, 32, 64, 128, 512, 1024}, + .free = {1, 1, 1, 1, 1, 1, 1, 1, 1}, + .lock = 8, + .unlock = 8, + .up = 12, + .down = 12, + .idx = 10, + }, + .score = 5, + }, +}; + +/* declared in test.h; used for printing information in test macro */ +int max_points = 100; + +/* + * Do initialization for tracer test functions. + */ + +static void init_test(void) +{ + int rc; + + rc = system("insmod " MODULE_FILENAME); + DIE(rc != 0, "init_test"); +} + +static void init_test2(int *fd) +{ + int rc; + + system("insmod " MODULE_FILENAME); + + rc = open("/dev/tracer", O_RDONLY); + DIE(rc < 0, "init_test2"); + + *fd = rc; +} + +/* + * Do cleanup for tracer test functions. + */ + +static void cleanup_test(void) +{ + system("rmmod " MODULE_NAME); +} + +static void cleanup_test2(int fd) +{ + close(fd); + + system("rmmod " MODULE_NAME); +} + +/* + * Do initialization for tracer helper test module + */ +static void init_helper(int *fd) +{ + int rc; + + system("insmod " HELPER_MODULE_FILENAME); + + rc = open("/dev/helper", O_RDONLY); + DIE(rc < 0, "init helper"); + + *fd = rc; +} + +/* + * Do cleanup for tracer helper test module + */ + +static void cleanup_helper(int fd) +{ + close(fd); + + system("rmmod " HELPER_MODULE_NAME); +} + + +/* + * Check for successful module insertion and removal from the kernel. + */ + +static void test_insmod_rmmod(void) +{ + int rc; + + rc = system("insmod " MODULE_FILENAME); + test("test_insmod", rc == 0, 1); + + rc = system("rmmod " MODULE_NAME); + test("test_rmmod", rc == 0, 1); + + rc = system("insmod " MODULE_FILENAME); + test(__func__, rc == 0, 1); + + system("rmmod " MODULE_NAME); +} + +static void test_open_dev_tracer(void) +{ + int rc; + char dev_name[64]; + + init_test(); + snprintf(dev_name, 63, "/dev/%s", TRACER_DEV_NAME); + + rc = open(dev_name, O_RDONLY); + test(__func__, rc >= 0, 1); + close(rc); + + cleanup_test(); +} + +static void test_dev_minor_major(void) +{ + int rc; + struct stat buf; + + init_test(); + + rc = lstat("/dev/tracer", &buf); + if (rc < 0) { + perror("lstat"); + exit(-1); + } + test(__func__, major(buf.st_rdev) == 10 && + minor(buf.st_rdev) == 42, 1); + + cleanup_test(); +} + +/* + * Check for proc entry for kprobe stats + */ + +static void test_proc_entry_exists_after_insmod(void) +{ + int rc; + + init_test(); + + rc = system("ls /proc/tracer > /dev/null 2>&1"); + test(__func__, rc == 0, 2); + + cleanup_test(); +} + +static void test_proc_entry_inexistent_after_rmmod(void) +{ + int rc; + + init_test(); + cleanup_test(); + + rc = system("ls /proc/tracer > /dev/null 2>&1"); + test(__func__, rc != 0, 2); +} + +int tracer_proc_check_values(struct tracer_stats *st, + struct test_case *tc, int no) +{ + int idx, idz, idk;/* really? */ + int a, b, c, d, e, f, g, h, i, j;/* no, no */ + int total_mem = 0; + int total_free = 0; + int no_free = 0; + int ok = 0; + /* this is embarassing - O(n^2) - stats are not sorted by pid */ + + for (idx = 0; idx < no; idx++) { + ok = 0; + for (idk = 0; idk < no; idk++) { + if (st[idk].tr_pid != tc[idx].test_params.pid) + continue; + ok = 1; + total_mem = 0; + total_free = 0; + no_free = 0; + + for (idz = 0; idz < tc[idx].test_params.kcalls; idz++) { + total_mem += tc[idx].test_params.alloc[idz]; + total_free += tc[idx].test_params.free[idz] * + tc[idx].test_params.alloc[idz]; + if (tc[idx].test_params.free[idz]) + no_free++; + } + + a = (st[idk].tr_pid == tc[idx].test_params.pid); + b = (st[idk].tr_alloc == tc[idx].test_params.kcalls); + dprintf("tr_alloc (%d): got %d, expected %d\n", + st[idk].tr_pid, st[idk].tr_alloc, + tc[idx].test_params.kcalls); + + c = (st[idk].tr_free == no_free); + dprintf("tr_free (%d): got %d, expected %d\n", + st[idk].tr_pid, st[idk].tr_free, no_free); + + d = (st[idk].tr_mem == total_mem); + dprintf("tr_mem (%d): got %d, expected %d\n", + st[idk].tr_pid, st[idk].tr_mem, total_mem); + + e = (st[idk].tr_mem_free == total_free); + dprintf("tr_free (%d): got %d, expected %d\n", + st[idk].tr_pid, st[idk].tr_mem_free, + total_free); + + f = (st[idk].tr_sched >= tc[idx].test_params.sched); + dprintf("tr_sched (%d): got %d, expected %d\n", + st[idk].tr_pid, st[idk].tr_sched, + tc[idx].test_params.sched); + + g = (st[idk].tr_up == tc[idx].test_params.up); + dprintf("tr_up (%d): got %d, expected %d\n", + st[idk].tr_pid, st[idk].tr_up, + tc[idx].test_params.up); + + h = (st[idk].tr_down == tc[idx].test_params.down); + dprintf("tr_down (%d): got %d, expected %d\n", + st[idk].tr_pid, st[idk].tr_down, + tc[idx].test_params.down); + + i = (st[idk].tr_lock == tc[idx].test_params.lock); + dprintf("tr_lock (%d): got %d, expected %d\n", + st[idk].tr_pid, st[idk].tr_lock, + tc[idx].test_params.lock); + + j = (st[idk].tr_unlock == tc[idx].test_params.unlock); + dprintf("tr_unlock (%d): got %d, expected %d\n", + st[idk].tr_pid, st[idk].tr_unlock, + tc[idx].test_params.unlock); + + if (!a || !b || !c || !d || + !e || !f || !g || !h || !i || !j) + return 0; + } + } + return ok; +} + +static void tracer_proc_read_values(struct tracer_stats *st, int no) +{ + char buffer[256]; + FILE *f; + int i; + + f = fopen("/proc/tracer", "rt"); + DIE(f == NULL, "tracer_proc_read_value"); + + /* skip header line */ + fgets(buffer, 256, f); + + for (i = 0; i < no; i++) { + fscanf(f, "%d %d %d %d %d %d %d %d %d %d", + &st[i].tr_pid, &st[i].tr_alloc, &st[i].tr_free, + &st[i].tr_mem, &st[i].tr_mem_free, &st[i].tr_sched, + &st[i].tr_up, &st[i].tr_down, &st[i].tr_lock, + &st[i].tr_unlock); + } + fclose(f); +} + +/* + * creates a process prepared to run with @tp params + * returns the pid of the newly created process + */ +void prepare_helper(int fd, struct test_params *tp, pid_t *pid) +{ + int rc; + + rc = ioctl(fd, PREPARE_TEST, tp); + DIE(rc < 0, "prepare helper"); + *pid = rc; +} + +void start_helper(int fd, int idx) +{ + int rc; + + rc = ioctl(fd, START_TEST, idx); + DIE(rc < 0, "start helper"); +} + +void stop_helper(int fd, int idx) +{ + int rc; + + rc = ioctl(fd, STOP_TEST, idx); + DIE(rc < 0, "stop helper"); +} + +/* XXX: we should really check the return codes */ +void trace_process(int fd, pid_t pid) +{ + int rc; + + rc = ioctl(fd, TRACER_ADD_PROCESS, pid); + DIE(rc < 0, "trace_process"); +} + +void untrace_process(int fd, pid_t pid) +{ + int rc; + + rc = ioctl(fd, TRACER_REMOVE_PROCESS, pid); + DIE(rc < 0, "untrace process"); +} +static void test_single(void) +{ + int fd, fdh, i, rc; + + init_test2(&fd); + init_helper(&fdh); + + for (i = 0; i < 11; i++) { + + prepare_helper(fdh, &tc[i].test_params, &tc[i].test_params.pid); + usleep(400 * MSECS); + + trace_process(fd, tc[i].test_params.pid); + usleep(400 * MSECS); + + start_helper(fdh, tc[i].test_params.idx); + usleep(400 * MSECS); + + /* check proc for schedule stats */ + tracer_proc_read_values(&ts[0], 1); + rc = tracer_proc_check_values(&ts[0], &tc[i], 1); + + memset(&ts[0], 0, sizeof(struct tracer_stats)); + + untrace_process(fd, tc[i].test_params.pid); + usleep(400 * MSECS); + stop_helper(fdh, tc[i].test_params.idx); + + usleep(400 * MSECS); + test(tc[i].test_name, rc == 1, tc[i].score); + } + + cleanup_helper(fdh); + cleanup_test2(fd); +} + + +static void test_multiple_zero_stats(void) +{ + int fd, fdh, i, rc; + struct test_case mz[16]; + struct tracer_stats zstats[16]; /* zstats, mz, wtf? */ + + for (i = 0; i < 16; i++) { + memset(&mz[i], 0, sizeof(struct test_case)); + snprintf(mz[i].test_params.thread_name, 16, "xthread-%d", i); + mz[i].test_params.idx = i; + } + + init_test2(&fd); + init_helper(&fdh); + + for (i = 0; i < 16; i++) { + prepare_helper(fdh, &mz[i].test_params, &mz[i].test_params.pid); + + trace_process(fd, mz[i].test_params.pid); + } + + usleep(400 * MSECS); + for (i = 0; i < 16; i++) + start_helper(fdh, mz[i].test_params.idx); + + usleep(400 * MSECS); + + /* check proc for schedule stats */ + + tracer_proc_read_values(&zstats[0], 16); + rc = tracer_proc_check_values(&zstats[0], &mz[0], 16); + + for (i = 0; i < 16; i++) + untrace_process(fd, mz[i].test_params.pid); + + for (i = 0; i < 16; i++) + stop_helper(fdh, mz[i].test_params.idx); + usleep(400 * MSECS); + + test("test_multiple_zero_stats", rc == 1, 5); + + cleanup_helper(fdh); + cleanup_test2(fd); +} + +/* + * FIXME: duplicate code + */ +static void test_multiple_nonzero_stats(void) +{ + int fd, fdh, i, rc; + struct test_case mz[16]; + struct tracer_stats zstats[16]; /* zstats, mz, wtf? */ + + for (i = 0; i < 16; i++) { + memset(&mz[i], 0, sizeof(struct test_case)); + snprintf(mz[i].test_params.thread_name, 16, "xthread-%d", i); + mz[i].test_params.up = i; + mz[i].test_params.down = i; + mz[i].test_params.sched = i; + mz[i].test_params.lock = i; + mz[i].test_params.unlock = i; + mz[i].test_params.idx = i; + } + + init_test2(&fd); + init_helper(&fdh); + + for (i = 0; i < 16; i++) { + prepare_helper(fdh, &mz[i].test_params, &mz[i].test_params.pid); + + trace_process(fd, mz[i].test_params.pid); + } + + usleep(400 * MSECS); + for (i = 0; i < 16; i++) + start_helper(fdh, mz[i].test_params.idx); + + usleep(400 * MSECS); + + /* check proc for schedule stats */ + + tracer_proc_read_values(&zstats[0], 16); + rc = tracer_proc_check_values(&zstats[0], &mz[0], 16); + + for (i = 0; i < 16; i++) + untrace_process(fd, mz[i].test_params.pid); + + for (i = 0; i < 16; i++) + stop_helper(fdh, mz[i].test_params.idx); + usleep(400 * MSECS); + + test("test_multiple_nonzero_stats", rc == 1, 12); + + cleanup_helper(fdh); + cleanup_test2(fd); +} + +/* + * FIXME: duplicate code + */ +static void test_decent_alloc_free(void) +{ + int fd, fdh, i, rc, j; + struct test_case mz[32]; + struct tracer_stats zstats[32]; /* zstats, mz, wtf? */ + + for (i = 0; i < 32; i++) { + memset(&mz[i], 0, sizeof(struct test_case)); + snprintf(mz[i].test_params.thread_name, 16, "xthread-%d", i); + mz[i].test_params.kcalls = 32; + for (j = 0; j < 32; j++) { + mz[i].test_params.alloc[j] = 8 * j * (i+1); + mz[i].test_params.free[j] = 1; + } + + mz[i].test_params.idx = i; + } + + init_test2(&fd); + init_helper(&fdh); + + for (i = 0; i < 32; i++) { + prepare_helper(fdh, &mz[i].test_params, &mz[i].test_params.pid); + + trace_process(fd, mz[i].test_params.pid); + } + + usleep(800 * MSECS); + for (i = 0; i < 32; i++) + start_helper(fdh, mz[i].test_params.idx); + + usleep(800 * MSECS); + + /* check proc for schedule stats */ + + tracer_proc_read_values(&zstats[0], 32); + rc = tracer_proc_check_values(&zstats[0], &mz[0], 32); + + for (i = 0; i < 32; i++) + untrace_process(fd, mz[i].test_params.pid); + + for (i = 0; i < 32; i++) + stop_helper(fdh, mz[i].test_params.idx); + usleep(800 * MSECS); + + test("test_decent_alloc_free", rc == 1, 12); + + cleanup_helper(fdh); + cleanup_test2(fd); +} + +/* + * FIXME: duplicate code + */ +static void test_mini_stress(void) +{ + int fd, fdh, i, rc; + struct test_case mz[32]; + struct tracer_stats zstats[32]; /* zstats, mz, wtf? */ + + for (i = 0; i < 32; i++) { + memset(&mz[i], 0, sizeof(struct test_case)); + snprintf(mz[i].test_params.thread_name, 16, "xthread-%d", i); + mz[i].test_params.up = 512 + i; + mz[i].test_params.down = 512 + i; + mz[i].test_params.sched = i; + mz[i].test_params.lock = 128 + i; + mz[i].test_params.unlock = 128 + i; + mz[i].test_params.idx = i; + } + + init_test2(&fd); + init_helper(&fdh); + + for (i = 0; i < 32; i++) { + prepare_helper(fdh, &mz[i].test_params, &mz[i].test_params.pid); + + trace_process(fd, mz[i].test_params.pid); + } + + usleep(800 * MSECS); + for (i = 0; i < 32; i++) + start_helper(fdh, mz[i].test_params.idx); + + usleep(800 * MSECS); + + /* check proc for schedule stats */ + + tracer_proc_read_values(&zstats[0], 32); + rc = tracer_proc_check_values(&zstats[0], &mz[0], 32); + + for (i = 0; i < 32; i++) + untrace_process(fd, mz[i].test_params.pid); + + for (i = 0; i < 32; i++) + stop_helper(fdh, mz[i].test_params.idx); + usleep(800 * MSECS); + + test("test_mini_stress", rc == 1, 12); + + cleanup_helper(fdh); + cleanup_test2(fd); +} + + + + +static void (*test_fun_array[])(void) = { + NULL, + test_insmod_rmmod, + test_open_dev_tracer, + test_dev_minor_major, + test_proc_entry_exists_after_insmod, + test_proc_entry_inexistent_after_rmmod, + test_single, + test_multiple_zero_stats, + test_multiple_nonzero_stats, + test_decent_alloc_free, + test_mini_stress, +}; + +/* + * Usage message for invalid executable call. + */ + +static void usage(const char *argv0) +{ + fprintf(stderr, "Usage: %s test_no\n\n", argv0); + exit(EXIT_FAILURE); +} + +int main(int argc, char **argv) +{ + int test_idx; + + if (argc != 2) + usage(argv[0]); + + test_idx = atoi(argv[1]); + + if (test_idx < 1 || + test_idx >= ARRAY_SIZE(test_fun_array)) { + fprintf(stderr, "Error: test index %d is out of bounds\n", + test_idx); + exit(EXIT_FAILURE); + } + + srand(time(NULL)); + srand48(time(NULL)); + test_fun_array[test_idx](); + + return 0; +} diff --git a/tools/labs/templates/assignments/1-tracer/checker/_test/tracer_test.h b/tools/labs/templates/assignments/1-tracer/checker/_test/tracer_test.h new file mode 100644 index 00000000000000..ac189824743ee0 --- /dev/null +++ b/tools/labs/templates/assignments/1-tracer/checker/_test/tracer_test.h @@ -0,0 +1,26 @@ +/* + * SO2 Kprobe based tracer - test suite specific header + * + * Authors: + * Daniel Baluta + */ + +#ifndef TRACER_TEST_H_ +#define TRACER_TEST_H_ 1 + +#ifdef __cplusplus +extern "C" { +#endif + +/* tracer test suite macros and structures */ +#define MODULE_NAME "tracer" +#define MODULE_FILENAME MODULE_NAME ".ko" + +#define HELPER_MODULE_NAME "tracer_helper" +#define HELPER_MODULE_FILENAME HELPER_MODULE_NAME ".ko" + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/tools/labs/templates/assignments/1-tracer/checker/_test/util.h b/tools/labs/templates/assignments/1-tracer/checker/_test/util.h new file mode 100644 index 00000000000000..72eb85e8200563 --- /dev/null +++ b/tools/labs/templates/assignments/1-tracer/checker/_test/util.h @@ -0,0 +1,71 @@ +/* + * useful structures/macros + * + * Operating Systems 2 + */ + +#ifndef UTIL_H_ +#define UTIL_H_ 1 + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include + +#if defined(_WIN32) + +#include + +static VOID PrintLastError(const PCHAR message) +{ + CHAR errBuff[1024]; + + FormatMessage( + FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_MAX_WIDTH_MASK, + NULL, + GetLastError(), + 0, + errBuff, + sizeof(errBuff) - 1, + NULL); + + fprintf(stderr, "%s: %s\n", message, errBuff); +} + +#define ERR(call_description) \ + do { \ + fprintf(stderr, "(%s, %d): ", \ + __FILE__, __LINE__); \ + PrintLastError(call_description); \ + } while (0) + +#elif defined(__linux__) + +/* error printing macro */ +#define ERR(call_description) \ + do { \ + fprintf(stderr, "(%s, %d): ", \ + __FILE__, __LINE__); \ + perror(call_description); \ + } while (0) + +#else + #error "Unknown platform" +#endif + +/* print error (call ERR) and exit */ +#define DIE(assertion, call_description) \ + do { \ + if (assertion) { \ + ERR(call_description); \ + exit(EXIT_FAILURE); \ + } \ + } while (0) + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/tools/labs/templates/assignments/1-tracer/tracer.h b/tools/labs/templates/assignments/1-tracer/tracer.h new file mode 100644 index 00000000000000..7a55257b770aaf --- /dev/null +++ b/tools/labs/templates/assignments/1-tracer/tracer.h @@ -0,0 +1,21 @@ +/* + * SO2 kprobe based tracer header file + * + * this is shared with user space + */ + +#ifndef TRACER_H__ +#define TRACER_H__ 1 + +#include +#ifndef __KERNEL__ +#include +#endif /* __KERNEL__ */ + +#define TRACER_DEV_MINOR 42 +#define TRACER_DEV_NAME "tracer" + +#define TRACER_ADD_PROCESS _IOW(_IOC_WRITE, 42, pid_t) +#define TRACER_REMOVE_PROCESS _IOW(_IOC_WRITE, 43, pid_t) + +#endif /* TRACER_H_ */ diff --git a/tools/labs/templates/assignments/2-uart/checker/Makefile b/tools/labs/templates/assignments/2-uart/checker/Makefile new file mode 100644 index 00000000000000..9cb87cb293e9cc --- /dev/null +++ b/tools/labs/templates/assignments/2-uart/checker/Makefile @@ -0,0 +1,14 @@ +CFLAGS = -Wall -g -static -m32 + +.PHONY: all run clean + +all: test solution.ko + +test: _test/test.o + $(CC) $(CFLAGS) -o $@ $^ + +solution.ko: _test/solution.ko + ln -s $< $@ + +clean: + -rm -f *~ test _test/test.o solution.ko diff --git a/tools/labs/templates/assignments/2-uart/checker/README b/tools/labs/templates/assignments/2-uart/checker/README new file mode 100644 index 00000000000000..67ae7234cd192c --- /dev/null +++ b/tools/labs/templates/assignments/2-uart/checker/README @@ -0,0 +1,38 @@ += UART16550 TEST SUITE == + +Test suite for UART16550 + +== FILES == + +README + * this file + +Makefile + * Makefile to build the test suite executable + +_checker + * script to run all tests defined in _test/test.c + +_test/test.c + * test suite for UART16550 + +_test/solution.ko + * kernel module implementing UART16550, + used to transmit/receive data to/from your kernel module + +== BUILDING == + +Use the Makefile to properly build the test executable: + + make + +== RUNNING == + +Copy your uart16550.ko module and _checker, test and solution.ko +to fsimg/root directory on your QEMU/KVM virtual machine. + +In order to run the test suite you can use the _checker script. + +The _checker script runs all tests and computes assignment grade: + + ./_checker diff --git a/tools/labs/templates/assignments/2-uart/checker/_checker b/tools/labs/templates/assignments/2-uart/checker/_checker new file mode 100755 index 00000000000000..a118d02b621c64 --- /dev/null +++ b/tools/labs/templates/assignments/2-uart/checker/_checker @@ -0,0 +1,4 @@ +#!/bin/sh + +insmod uart16550.ko; cat /proc/modules > /dev/kmsg; rmmod uart16550 +./test diff --git a/tools/labs/templates/assignments/2-uart/checker/_test/solution.ko b/tools/labs/templates/assignments/2-uart/checker/_test/solution.ko new file mode 100644 index 00000000000000..996dd21ac572b8 Binary files /dev/null and b/tools/labs/templates/assignments/2-uart/checker/_test/solution.ko differ diff --git a/tools/labs/templates/assignments/2-uart/checker/_test/test.c b/tools/labs/templates/assignments/2-uart/checker/_test/test.c new file mode 100644 index 00000000000000..6e87f7055daac8 --- /dev/null +++ b/tools/labs/templates/assignments/2-uart/checker/_test/test.c @@ -0,0 +1,593 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "uart16550.h" + +#define UART16550_MAJOR 42 +#define COM1_MAJOR 42 +#define COM2_MAJOR 42 +#define STR(x) #x +#define XSTR(x) STR(x) +#define OPTION_COM1_ONLY 1 +#define OPTION_COM2_ONLY 2 +#define OPTION_BOTH 3 + +#define MODULE_NAME "uart16550" +#define SOLUTION_NAME "solution" + +#define PAD_CHARS 60 + +#define UART0 "/dev/uart0" +#define UART1 "/dev/uart1" +#define UART10 "/dev/uart10" + +#define INFILE "testfile.in" +#define OUTFILE "testfile.out" + +#define fail(s) do { \ + printf("%s:%d: ", __func__, __LINE__); \ + fflush(stdout); \ + perror(s); \ + exit(EXIT_FAILURE); \ + } while (0) + + +#define test(d, v, e, p) do_test((d), (v), (e), 0, 0, (p)) +#define not_test(d, v, e, p) do_test((d), (v), (e), 1, 0, (p)) +#define fatal_test(d, v, e,p) do_test((d), (v), (e), 0, 1, (p)) + +#define GENERIC_TEST_TIMEOUT 3 +const int total = 92; + +void sig_handler(int signum) { + fprintf(stderr, "Child process pid=%d of checker (that issues read/write syscalls to the driver) got killed after TIMEOUT=%ds\n", getpid(), GENERIC_TEST_TIMEOUT); + fprintf(stderr, "\tThis might be because you didn't implement read/write or there is a bug in the implementation\n"); + exit(EXIT_FAILURE); +} + +/* + * if the test passes it will return 0 + * if it fails it returns the number of points given as argument + */ +static float +do_test(const char *description, int value, int expected, int negate, int fatal, float points) +{ + int num_chars; + + num_chars = printf("%s", description); + for (; num_chars < PAD_CHARS - strlen("passed"); num_chars++) + putchar('.'); + fflush(stdout); + if (!negate) { + if (value == expected) { + printf("passed [%.1f/%d]\n", points, total); + fflush(stdout); + return 0; + } else { + printf("failed [0/%d]\n", total); + fflush(stdout); + if (fatal) + exit(EXIT_FAILURE); + } + } else { + if (value != expected) { + printf("passed [%.1f/%d]\n", points, total); + fflush(stdout); + return 0; + } else { + printf("failed [0/%d]\n", total); + fflush(stdout); + if (fatal) + exit(EXIT_FAILURE); + } + } + return points; +} + +static void +test_title(const char *title) +{ + int len = strlen(title); + int pad = (PAD_CHARS - len) / 2 - 1; + int mod = (PAD_CHARS - len) % 2; + int i; + + assert(pad >= 1); + putchar('\n'); + for (i = 0; i < pad; i++) + putchar('='); + printf(" %s ", title); + for (i = 0; i < pad + mod; i++) + putchar('='); + putchar('\n'); +} + +static void +make_nodes(void) +{ + mknod(UART0, S_IFCHR, COM1_MAJOR<<8); + mknod(UART1, S_IFCHR, (COM2_MAJOR<<8) + 1); + mknod(UART10, S_IFCHR, (UART16550_MAJOR<<8)+10); +} + +static void +remove_nodes(void) +{ + unlink(UART0); + unlink(UART1); + unlink(UART10); +} + +static float +test1(void) +{ + float total = 16; + + test_title("Test 1. Module insertion and removal"); + + /* Insert module with default params and test. */ + total -= fatal_test("insmod " MODULE_NAME ", default options", + system("insmod " MODULE_NAME ".ko"), 0, 1); + total -= test("major", + system("cat /proc/devices | grep '" XSTR(COM1_MAJOR) " " MODULE_NAME "' >/dev/null 2>&1"), + 0, 1); + total -= test("ioports COM1", + system("cat /proc/ioports | grep '03f8-03ff : " MODULE_NAME "' > /dev/null 2>&1"), + 0, 1); + total -= test("ioports COM2", + system("cat /proc/ioports | grep '02f8-02ff : " MODULE_NAME "' > /dev/null 2>&1"), + 0, 1); + total -= test("interrupts COM1", + system("cat /proc/interrupts | grep '4:.*" MODULE_NAME "' > /dev/null 2>&1"), + 0, 1); + total -= test("interrupts COM2", + system("cat /proc/interrupts | grep '3:.*" MODULE_NAME "' > /dev/null 2>&1"), + 0, 1); + total -= test("rmmod", system("rmmod " MODULE_NAME), 0, 0.5); + + /* Insert module with different major. */ + total -= fatal_test("insmod " MODULE_NAME ", major=" XSTR(COM2_MAJOR), + system("insmod " MODULE_NAME ".ko major=" XSTR(COM2_MAJOR)), 0, 1); + total -= test("major", + system("cat /proc/devices | grep '" XSTR(COM2_MAJOR) " " MODULE_NAME "' >/dev/null 2>&1"), + 0, 1); + total -= test("rmmod", system("rmmod " MODULE_NAME), 0, 0.5); + + /* Insert module only for COM2, check that it works side by side + * with solution. + */ + total -= fatal_test("insmod " MODULE_NAME ", COM2 only", + system("insmod " MODULE_NAME ".ko option=" XSTR(OPTION_COM2_ONLY)), + 0, 1); + total -= fatal_test("insmod " SOLUTION_NAME ", COM1 only", + system("insmod " SOLUTION_NAME ".ko option=" XSTR(OPTION_COM1_ONLY)), + 0, 1); + total -= test("ioports COM1", + system("cat /proc/ioports | grep '03f8-03ff : " SOLUTION_NAME "' > /dev/null 2>&1"), + 0, 1); + total -= test("ioports COM2", + system("cat /proc/ioports | grep '02f8-02ff : " MODULE_NAME "' > /dev/null 2>&1"), + 0, 1); + total -= test("interrupts COM1", + system("cat /proc/interrupts | grep '4:.*" SOLUTION_NAME "' > /dev/null 2>&1"), + 0, 1); + total -= test("interrupts COM2", + system("cat /proc/interrupts | grep '3:.*" MODULE_NAME "' > /dev/null 2>&1"), + 0, 1); + total -= test("rmmod " MODULE_NAME, system("rmmod " MODULE_NAME), 0, 0.5); + total -= test("rmmod " SOLUTION_NAME, system("rmmod " SOLUTION_NAME), 0, 0.5); + + return total; +} + +static float +test2(void) +{ + float total = 5.5; + int fd; + + test_title("Test 2. Invalid parameters"); + + /* Check ioctl sanity. */ + total -= fatal_test("insmod", system("insmod " MODULE_NAME ".ko"), 0, 1); + fd = open(UART0, O_RDWR); + if (fd == -1) + fail("open " UART0); +#define ioctl_test(n) test("invalid ioctl " XSTR((n)), \ + ioctl(fd, UART16550_IOCTL_SET_LINE, (n)), -1, 1) + total -= ioctl_test(0xdeadbeef); + total -= ioctl_test(0x1337cafe); +#undef ioctl_test + total -= test("invalid ioctl wrong operation", ioctl(fd, 0xffff), -1, 1); + close(fd); + total -= test("rmmod", system("rmmod " MODULE_NAME), 0, 0.5); + + /* Check invalid module parameters. */ + total -= not_test("insmod " MODULE_NAME ", option=0xdeadbabe", + system("insmod " MODULE_NAME ".ko option=0xdeadbabe"), + 0, 1); + + return total; +} + +/* Speed sets: + * 0 -> 1200, 2400, 4800 + * 1 -> 9600, 19200, 38400, 56000 + * 2 -> 115200 + */ +static const struct { + int num; + unsigned char speed[4]; + int bufsizes[2]; /* min and max */ +} speed_sets[3] = { + { + .num = 3, + .speed = { UART16550_BAUD_1200, + UART16550_BAUD_2400, + UART16550_BAUD_4800, -1 }, + .bufsizes = { 128, 256 }, + }, + { + .num = 4, + .speed = { UART16550_BAUD_9600, + UART16550_BAUD_19200, + UART16550_BAUD_38400, + UART16550_BAUD_56000 }, + .bufsizes = { 256, 1024 }, + }, + { + .num = 1, + .speed = { UART16550_BAUD_115200, -1, -1, -1 }, + .bufsizes = { 2048, 2048 }, + }, +}; + +static void +gen_params(struct uart16550_line_info *line, int speed_set) +{ + int r; + + line->baud = speed_sets[speed_set].speed[rand() % + speed_sets[speed_set].num]; + line->len = UART16550_LEN_8; + line->stop = rand() % 2 * 4; + r = rand() % 4; + line->par = r < 2 ? r*8 : 0x18 + (r-2) * 8; +} + +int do_read(int fd, unsigned char *buffer, int size) +{ + int n, from = 0; + + while (1) { + n = read(fd, &buffer[from], size - from); + if (n <= 0) + return -1; + if (n + from == size) + return 0; + from += n; + } +} + +int do_write(int fd, unsigned char *buffer, unsigned int size) +{ + int n, from = 0; + + while (1) { + n = write(fd, &buffer[from], size - from); + if (n <= 0) { + perror("write"); + return -1; + } + if (n + from == size) + return 0; + from += n; + } +} + +static int +gen_test_file(char *fname, int speed_set) +{ + int size, min, max; + char comm[1024]; + + min = speed_sets[speed_set].bufsizes[0]; + max = speed_sets[speed_set].bufsizes[1]; + size = (min == max) ? min : rand() % (min - max) + min; + sprintf(comm, + "dd if=/dev/urandom of=%s bs=1 count=%d >/dev/null 2>/dev/null", + fname, + size); + if (system(comm)) + fprintf(stderr, "failed to generate random file (%s)\n", comm); + return size; +} + +static void +copy_file(int fdr, int fdw, int len) +{ +#define COPY_BUF_SIZE 128 + unsigned char buf[COPY_BUF_SIZE]; + + do { + int partial, rc; + + partial = len < COPY_BUF_SIZE ? len : COPY_BUF_SIZE; + if (partial == 0) + break; + rc = read(fdr, buf, partial); + if (rc == 0) + break; + if (rc == -1) + fail("read"); + len -= rc; + rc = do_write(fdw, buf, rc); + if (rc < 0) + fail("write"); + } while (1); +} + +static int +copy_test(int fd0, int fd1, int speed_set) +{ + pid_t rpid, wpid, kpid; + int len, status, fd; + int rc1, rc2, rc3, exit_status1, exit_status2, exit_status3; + int i; + + len = gen_test_file(INFILE, speed_set); + rpid = fork(); + switch (rpid) { + case 0: + fd = open(INFILE, O_RDONLY); + if (fd < 0) + fail("open " INFILE); + copy_file(fd, fd0, len); + close(fd); + exit(EXIT_SUCCESS); + break; + default: + break; + } + + wpid = fork(); + switch (wpid) { + case 0: + fd = open(OUTFILE, O_WRONLY | O_CREAT | O_TRUNC, 0644); + if (fd < 0) + fail("open " OUTFILE); + copy_file(fd1, fd, len); + close(fd); + exit(EXIT_SUCCESS); + break; + default: + break; + } + + kpid = fork(); + switch (kpid) { + case 0: + for (i = 0; i < GENERIC_TEST_TIMEOUT; i++) { + /* + * check if procs still exist. kill with arg 0 + * will succed (ret 0) if the pid exists + */ + if (!kill(rpid, 0)) { + sleep(1); + continue; + } else if (!kill(wpid, 0)) { + sleep(1); + continue; + } else + break; + + } + kill(rpid, SIGTERM); + kill(wpid, SIGTERM); + exit(EXIT_SUCCESS); + break; + default: + break; + + } + + rc1 = waitpid(rpid, &status, 0); + exit_status1 = WEXITSTATUS(status); + + + rc2 = waitpid(wpid, &status, 0); + exit_status2 = WEXITSTATUS(status); + + rc3 = waitpid(kpid, &status, 0); + exit_status3 = WEXITSTATUS(status); + + if (rc1 < 0 || rc2 < 0 || rc3 < 0 || + exit_status1 || exit_status2 || exit_status3) + return -1; + + return system("diff " INFILE " " OUTFILE "> /dev/null 2> /dev/null"); +} + +static float +generic_test(const char *reader, const char *writer, int speed_set, + int num_tests) +{ + int fd0, fd1, i; + float total = num_tests * 1.5 + (reader != writer ? 6 : 4) * 0.5; + char dbuf[1024], cbuf[1024]; + struct uart16550_line_info uli; + + if (reader != writer) { + sprintf(dbuf, "insmod %s", reader); + sprintf(cbuf, "insmod %s.ko option=%d", + reader, OPTION_COM2_ONLY); + total -= fatal_test(dbuf, system(cbuf), 0, 0.5); + sprintf(dbuf, "insmod %s", writer); + sprintf(cbuf, "insmod %s.ko option=%d", + writer, OPTION_COM1_ONLY); + total -= fatal_test(dbuf, system(cbuf), 0, 0.5); + } else { + sprintf(dbuf, "insmod %s", reader); + sprintf(cbuf, "insmod %s.ko", reader); + total -= fatal_test(dbuf, system(cbuf), 0, 0.5); + } + + gen_params(&uli, speed_set); + fd0 = open(UART0, O_WRONLY); + if (fd0 == -1) + fail("open " UART0); + fd1 = open(UART1, O_RDONLY); + if (fd1 == -1) + fail("open " UART1); + total -= test("ioctl reader", + ioctl(fd1, UART16550_IOCTL_SET_LINE, &uli), 0, 0.5); + total -= test("ioctl writer", + ioctl(fd0, UART16550_IOCTL_SET_LINE, &uli), 0, 0.5); + + for (i = 0; i < num_tests; i++) { + sprintf(dbuf, "test %02d", i + 1); + total -= test(dbuf, copy_test(fd0, fd1, speed_set), 0, 1.5); + } + + close(fd0); + close(fd1); + + if (reader != writer) { + sprintf(dbuf, "rmmod %s", reader); + sprintf(cbuf, "rmmod %s.ko", reader); + total -= fatal_test(dbuf, system(cbuf), 0, 0.5); + sprintf(dbuf, "rmmod %s", writer); + sprintf(cbuf, "rmmod %s.ko", writer); + total -= fatal_test(dbuf, system(cbuf), 0, 0.5); + } else { + sprintf(dbuf, "rmmod %s", reader); + sprintf(cbuf, "rmmod %s.ko", reader); + total -= fatal_test(dbuf, system(cbuf), 0, 0.5); + } + + return total; +} + +#define choose_one(rd, wr) do { \ + int r = rand() % 2; \ + if (r == 0) { \ + rd = MODULE_NAME; \ + wr = SOLUTION_NAME; \ + } else { \ + rd = SOLUTION_NAME; \ + wr = MODULE_NAME; \ + } \ + } while (0) + +static float +test3(void) +{ + const char *rd, *wr; + + rd = MODULE_NAME; + wr = SOLUTION_NAME; + test_title("Test 3. Read, small speed"); + return generic_test(rd, wr, 0, 5); +} + +static float +test4(void) +{ + const char *rd, *wr; + + rd = SOLUTION_NAME; + wr = MODULE_NAME; + test_title("Test 4. Write, small speed"); + return generic_test(rd, wr, 0, 5); +} + +static float +test5(void) +{ + const char *rd, *wr; + + rd = wr = MODULE_NAME; + test_title("Test 5. Back-to-back, small speed"); + return generic_test(rd, wr, 0, 5); +} + +static float +test6(void) +{ + const char *rd, *wr; + + choose_one(rd, wr); + test_title("Test 6. Read/Write, medium speed"); + return generic_test(rd, wr, 1, 5); +} + +static float +test7(void) +{ + const char *rd, *wr; + + rd = wr = MODULE_NAME; + test_title("Test 7. Back-to-back, medium speed"); + return generic_test(rd, wr, 1, 5); +} + +static float +test8(void) +{ + const char *rd, *wr; + + choose_one(rd, wr); + test_title("Test 8. Read/Write, high speed"); + return generic_test(rd, wr, 2, 5); +} + +static float +test9(void) +{ + const char *rd, *wr; + + rd = wr = MODULE_NAME; + test_title("Test 9. Back-to-back, high speed"); + return generic_test(rd, wr, 2, 5); +} + +int +main(void) +{ + float num_passed = 0; + + signal(SIGTERM, sig_handler); + srand(time(NULL)); + make_nodes(); + + num_passed += test1(); + num_passed += test2(); + num_passed += test3(); + num_passed += test4(); + num_passed += test5(); + num_passed += test6(); + num_passed += test7(); + num_passed += test8(); + num_passed += test9(); + + remove_nodes(); + unlink(INFILE); + unlink(OUTFILE); + printf("\nTotal: [%.1f/%d]\n", num_passed, total); + + return 0; +} + +/* Extra 2 lines so the file is the proper size. */ diff --git a/tools/labs/templates/assignments/2-uart/checker/_test/uart16550.h b/tools/labs/templates/assignments/2-uart/checker/_test/uart16550.h new file mode 100644 index 00000000000000..73008921925769 --- /dev/null +++ b/tools/labs/templates/assignments/2-uart/checker/_test/uart16550.h @@ -0,0 +1,46 @@ +#ifndef UART16550_H +#define UART16550_H + +#define OPTION_COM1 1 +#define OPTION_COM2 2 +#define OPTION_BOTH 3 + +#define UART16550_COM1_SELECTED 0x01 +#define UART16550_COM2_SELECTED 0x02 + +#define MAX_NUMBER_DEVICES 2 + +#ifndef _UART16550_REGS_H + +#define UART16550_BAUD_1200 96 +#define UART16550_BAUD_2400 48 +#define UART16550_BAUD_4800 24 +#define UART16550_BAUD_9600 12 +#define UART16550_BAUD_19200 6 +#define UART16550_BAUD_38400 3 +#define UART16550_BAUD_56000 2 +#define UART16550_BAUD_115200 1 + +#define UART16550_LEN_5 0x00 +#define UART16550_LEN_6 0x01 +#define UART16550_LEN_7 0x02 +#define UART16550_LEN_8 0x03 + +#define UART16550_STOP_1 0x00 +#define UART16550_STOP_2 0x04 + +#define UART16550_PAR_NONE 0x00 +#define UART16550_PAR_ODD 0x08 +#define UART16550_PAR_EVEN 0x18 +#define UART16550_PAR_STICK 0x20 + +#endif + +#define UART16550_IOCTL_SET_LINE 1 + +struct uart16550_line_info { + unsigned char baud, len, par, stop; +}; + +#endif + diff --git a/tools/labs/templates/assignments/2-uart/uart16550.h b/tools/labs/templates/assignments/2-uart/uart16550.h new file mode 100644 index 00000000000000..e47e82945404a4 --- /dev/null +++ b/tools/labs/templates/assignments/2-uart/uart16550.h @@ -0,0 +1,48 @@ +#ifndef _UART16550_H +#define _UART16550_H + +#define OPTION_COM1 1 +#define OPTION_COM2 2 +#define OPTION_BOTH 3 + +#define UART16550_COM1_SELECTED 0x01 +#define UART16550_COM2_SELECTED 0x02 + +#define MAX_NUMBER_DEVICES 2 + +#ifndef _UART16550_REGS_H + + + +#define UART16550_BAUD_1200 96 +#define UART16550_BAUD_2400 48 +#define UART16550_BAUD_4800 24 +#define UART16550_BAUD_9600 12 +#define UART16550_BAUD_19200 6 +#define UART16550_BAUD_38400 3 +#define UART16550_BAUD_56000 2 +#define UART16550_BAUD_115200 1 + +#define UART16550_LEN_5 0x00 +#define UART16550_LEN_6 0x01 +#define UART16550_LEN_7 0x02 +#define UART16550_LEN_8 0x03 + +#define UART16550_STOP_1 0x00 +#define UART16550_STOP_2 0x04 + +#define UART16550_PAR_NONE 0x00 +#define UART16550_PAR_ODD 0x08 +#define UART16550_PAR_EVEN 0x18 +#define UART16550_PAR_STICK 0x20 + +#endif + +#define UART16550_IOCTL_SET_LINE 1 + +struct uart16550_line_info { + unsigned char baud, len, par, stop; +}; + + +#endif diff --git a/tools/labs/templates/assignments/3-raid/Kbuild b/tools/labs/templates/assignments/3-raid/Kbuild new file mode 100644 index 00000000000000..98b113c88dccfa --- /dev/null +++ b/tools/labs/templates/assignments/3-raid/Kbuild @@ -0,0 +1,2 @@ +EXTRA_CFLAGS = -Wall -Wno-unused-function -g +obj-m = ssr.o diff --git a/tools/labs/templates/assignments/3-raid/checker/Makefile b/tools/labs/templates/assignments/3-raid/checker/Makefile new file mode 100644 index 00000000000000..9eea19fbf00cbb --- /dev/null +++ b/tools/labs/templates/assignments/3-raid/checker/Makefile @@ -0,0 +1,14 @@ +CFLAGS = -Wall -Wextra -g -m32 +LDFLAGS = -static -m32 + +.PHONY: all clean + +all: + +test: + make -C _test/ + ln -sf _test/run-test run-test + +clean: + -make -C _test/ clean + rm -rf run-test diff --git a/tools/labs/templates/assignments/3-raid/checker/README b/tools/labs/templates/assignments/3-raid/checker/README new file mode 100644 index 00000000000000..8fb41fd9b25275 --- /dev/null +++ b/tools/labs/templates/assignments/3-raid/checker/README @@ -0,0 +1,40 @@ += SOFTWARE RAID TEST SUITE == + +Test suite for software RAID + +== FILES == + +README + * this file + +Makefile + * Makefile for automating the build process + +_checker + * script to run all tests defined in _test/test.c + +_test/test.c + * test suite for software RAID + +== RUNNING == + +In order to run the test suite you can either use the _checker +script or run the run-test executable. + +The kernel module must be named ssr.ko and must be in the current folder. + +The run-test executable has to be in the current folder. You can create +a link using: + + ln -sf _test/run-test run-test + +The _checker script runs all tests and computes assignment grade. You +can use any of the two commands below. + + make test + ./_checker + +In order to run a specific test, pass the test number (1 .. 78) to the +run-test executable. + + ./run-test 5 diff --git a/tools/labs/templates/assignments/3-raid/checker/_checker b/tools/labs/templates/assignments/3-raid/checker/_checker new file mode 100755 index 00000000000000..da4e4058d736be --- /dev/null +++ b/tools/labs/templates/assignments/3-raid/checker/_checker @@ -0,0 +1,4 @@ +#!/bin/sh + +/bin/dmesg -c > /dev/null 2>&1 +./run-test diff --git a/tools/labs/templates/assignments/3-raid/checker/_test/Makefile b/tools/labs/templates/assignments/3-raid/checker/_test/Makefile new file mode 100644 index 00000000000000..a8a98a2cb2220d --- /dev/null +++ b/tools/labs/templates/assignments/3-raid/checker/_test/Makefile @@ -0,0 +1,15 @@ +CFLAGS = -Wall -Wextra -Wno-unused-function -g -m32 +LDFLAGS = -static -m32 + +.PHONY: all clean + +all: run-test + +run-test: run-test.o test.o + +run-test.o: run-test.c run-test.h + +test.o: test.c run-test.h + +clean: + -rm -f *~ test.o run-test.o run-test test diff --git a/tools/labs/templates/assignments/3-raid/checker/_test/run-test.c b/tools/labs/templates/assignments/3-raid/checker/_test/run-test.c new file mode 100644 index 00000000000000..f793af74439be8 --- /dev/null +++ b/tools/labs/templates/assignments/3-raid/checker/_test/run-test.c @@ -0,0 +1,104 @@ +#include +#include +#include +#include +#include + +#include "run-test.h" + +/* Enable/disable exiting when program fails. */ +//#define EXIT_IF_FAIL + +static size_t test_index; +static size_t total_points = 0; + +static void test_do_fail(size_t points) +{ + printf("failed [ 0/%3zu]\n", max_points); +#ifdef EXIT_IF_FAIL + exit(EXIT_FAILURE); +#endif +} + +static void test_do_pass(size_t points) +{ + total_points += points; + printf("passed [%3zu/%3zu]\n", points, max_points); +} + +void basic_test(int condition) +{ + size_t i; + char *description = test_array[test_index].description; + size_t desc_len = strlen(description); + size_t points = test_array[test_index].points; + + printf("(%3zu) %s", test_index + 1, description); + for (i = 0; i < 56 - desc_len; i++) + printf("."); + if (condition) + test_do_pass(points); + else + test_do_fail(points); +} + +static void print_test_total(void) +{ + size_t i; + + for (i = 0; i < 62; i++) + printf(" "); + printf("Total: [%3zu/%3zu]\n", total_points, max_points); +} + +static void run_test(void) +{ + test_array[test_index].function(); +} + +int main(int argc, char **argv) +{ + size_t num_tests = get_num_tests(); + + if (argc > 2) { + fprintf(stderr, "Usage: %s [test_number]\n", argv[0]); + fprintf(stderr, " 1 <= test_number <= %zu\n", num_tests); + exit(EXIT_FAILURE); + } + + /* Randomize time quantums. */ + srand(time(NULL)); + + /* In case of no arguments run all tests. */ + if (argc == 1) { + init_world(); + for (test_index = 0; test_index < num_tests; test_index++) + run_test(); + print_test_total(); + cleanup_world(); + return 0; + } + + /* If provided, argument is test index. */ + test_index = strtoul(argv[1], NULL, 10); + if (errno == EINVAL || errno == ERANGE) { + fprintf(stderr, "%s is not a number\n", argv[1]); + exit(EXIT_FAILURE); + } + + if (test_index == 0 || test_index > num_tests) { + fprintf(stderr, "Error: Test index is out of range " + "(1 <= test_index <= %zu).\n", num_tests); + exit(EXIT_FAILURE); + } + + /* test_index is one less than what the user provides. */ + test_index--; + + /* Run test_index test. */ + init_world(); + run_test(); + cleanup_world(); + + return 0; +} diff --git a/tools/labs/templates/assignments/3-raid/checker/_test/run-test.h b/tools/labs/templates/assignments/3-raid/checker/_test/run-test.h new file mode 100644 index 00000000000000..e4d64f6aa1b375 --- /dev/null +++ b/tools/labs/templates/assignments/3-raid/checker/_test/run-test.h @@ -0,0 +1,25 @@ +#ifndef _RUN_TEST_H_ +#define _RUN_TEST_H_ + +/* functions exported by the framework */ +void basic_test(int condition); + +/* function exported by the test */ +void init_world(void); +void cleanup_world(void); +size_t get_num_tests(void); + +/* test function prototype */ +typedef void (test_f)(void); + +struct run_test_t { + test_f *function; /* test/evaluation function */ + char *description; /* test description */ + size_t points; /* points for each test */ +}; + +/* Use test_index to pass through test_array. */ +extern struct run_test_t test_array[]; +extern size_t max_points; + +#endif /* _RUN_TEST_H_ */ diff --git a/tools/labs/templates/assignments/3-raid/checker/_test/ssr.h b/tools/labs/templates/assignments/3-raid/checker/_test/ssr.h new file mode 100644 index 00000000000000..5aa4107fb15825 --- /dev/null +++ b/tools/labs/templates/assignments/3-raid/checker/_test/ssr.h @@ -0,0 +1,26 @@ +/* + * Simple Software Raid - Linux header file + */ + +#ifndef SSR_H_ +#define SSR_H_ 1 + +#define SSR_MAJOR 240 +#define SSR_FIRST_MINOR 0 +#define SSR_NUM_MINORS 1 + +#define PHYSICAL_DISK1_NAME "/dev/vdb" +#define PHYSICAL_DISK2_NAME "/dev/vdc" + +/* sector size */ +#define KERNEL_SECTOR_SIZE 512 + +/* physical partition size - 95 MB (more than this results in error) */ +#define LOGICAL_DISK_NAME "/dev/ssr" +#define LOGICAL_DISK_SIZE (95 * 1024 * 1024) +#define LOGICAL_DISK_SECTORS ((LOGICAL_DISK_SIZE) / (KERNEL_SECTOR_SIZE)) + +/* sync data */ +#define SSR_IOCTL_SYNC 1 + +#endif diff --git a/tools/labs/templates/assignments/3-raid/checker/_test/test.c b/tools/labs/templates/assignments/3-raid/checker/_test/test.c new file mode 100644 index 00000000000000..b6a36ec618aee1 --- /dev/null +++ b/tools/labs/templates/assignments/3-raid/checker/_test/test.c @@ -0,0 +1,1769 @@ +#include +#include +#include +#include +#include +#include +#include + +#include "run-test.h" +#include "ssr.h" + +#define SSR_BASE_NAME "ssr" +#define SSR_LIN_EXT ".ko" +#define SSR_MOD_NAME SSR_BASE_NAME SSR_LIN_EXT + +#define CRC_SIZE 4 + +#define ONE_SECTOR KERNEL_SECTOR_SIZE +#define ONE_PAGE 4096 +#define TWO_PAGES 8192 +#define TEN_PAGES 40960 +#define ONE_MEG 1048576 + +/* Read/write buffers. */ +static unsigned char *log_rd_buf, *log_wr_buf; +static unsigned char *phys1_rd_buf, *phys1_wr_buf; +static unsigned char *phys2_rd_buf, *phys2_wr_buf; +static unsigned char *log_rd_crc, *log_wr_crc; +static unsigned char *phys1_rd_crc, *phys1_wr_crc; +static unsigned char *phys2_rd_crc, *phys2_wr_crc; + +/* File descriptors. */ +static int log_fd, phys1_fd, phys2_fd; + +enum { + START = 0, + MIDDLE, + END +}; + +enum { + PHYS_FILL_DATA = 'P', + LOG_FILL_DATA = 'L', + CORRUPT_DATA = 'C', + PHYS1_DISK_DIRTY_DATA = 'a', + PHYS1_BUF_DIRTY_DATA = 'A', + PHYS2_DISK_DIRTY_DATA = 'b', + PHYS2_BUF_DIRTY_DATA = 'B', + LOG_DISK_DIRTY_DATA = 'd', + LOG_BUF_DIRTY_DATA = 'D', +}; + +/* + * "upgraded" read routine + */ + +static ssize_t xread(int fd, void *buffer, size_t len) +{ + ssize_t ret; + ssize_t n; + + n = 0; + while (n < (ssize_t) len) { + ret = read(fd, (char *) buffer + n, len - n); + if (ret < 0) + return -1; + if (ret == 0) + break; + n += ret; + } + + return n; +} + +/* + * "upgraded" write routine + */ + +static ssize_t xwrite(int fd, const void *buffer, size_t len) +{ + ssize_t ret; + ssize_t n; + + n = 0; + while (n < (ssize_t) len) { + ret = write(fd, (const char *) buffer + n, len - n); + if (ret < 0) + return -1; + if (ret == 0) + break; + n += ret; + } + + return n; +} + +/* + * Compute CRC32. + */ + +static unsigned int crc32(unsigned int seed, + const unsigned char *p, unsigned int len) +{ + size_t i; + unsigned int crc = seed; + + while (len--) { + crc ^= *p++; + for (i = 0; i < 8; i++) + crc = (crc >> 1) ^ ((crc & 1) ? 0xedb88320 : 0); + } + + return crc; +} + +static void compute_crc(const void *data_buffer, void *crc_buffer, size_t len) +{ + size_t i; + unsigned int crc; + + for (i = 0; i < len; i += ONE_SECTOR) { + crc = crc32(0, (const unsigned char *) data_buffer + i, ONE_SECTOR); + memcpy((char *) crc_buffer + i / ONE_SECTOR * CRC_SIZE, + &crc, CRC_SIZE); + } +} + +static off_t data_offset_from_whence(int whence, size_t len) +{ + switch (whence) { + case START: + return 0; + case MIDDLE: + return LOGICAL_DISK_SIZE / 2 - len; + case END: + return LOGICAL_DISK_SIZE - len; + default: + return -1; + } +} + +static off_t crc_offset_from_whence(int whence, size_t len) +{ + off_t data_offset = data_offset_from_whence(whence, len); + + return LOGICAL_DISK_SIZE + data_offset / ONE_SECTOR * CRC_SIZE; +} + +static void fill_buffer(void *buffer, int c, size_t len) +{ + memset(buffer, c, len); +} + +static void log_fill_buffer(size_t len) +{ + fill_buffer(log_wr_buf, LOG_FILL_DATA, len); +} + +static void phys_fill_buffer(size_t len) +{ + fill_buffer(phys1_wr_buf, PHYS_FILL_DATA, len); + fill_buffer(phys2_wr_buf, PHYS_FILL_DATA, len); +} + +static ssize_t read_whence_data(int fd, void *buffer, size_t len, int whence) +{ + off_t offset = data_offset_from_whence(whence, len); + + lseek(fd, offset, SEEK_SET); + return xread(fd, buffer, len); +} + +static ssize_t read_whence_crc(int fd, void *crc_buffer, size_t data_len, + int whence) +{ + off_t offset = crc_offset_from_whence(whence, data_len); + + lseek(fd, offset, SEEK_SET); + return xread(fd, crc_buffer, data_len / ONE_SECTOR * CRC_SIZE); +} + +static ssize_t write_whence_data(int fd, const void *buffer, + size_t len, int whence) +{ + off_t offset = data_offset_from_whence(whence, len); + + lseek(fd, offset, SEEK_SET); + return xwrite(fd, buffer, len); +} + +static ssize_t write_whence_crc(int fd, void *crc_buffer, size_t data_len, + int whence) +{ + off_t offset = crc_offset_from_whence(whence, data_len); + + lseek(fd, offset, SEEK_SET); + return xwrite(fd, crc_buffer, data_len / ONE_SECTOR * CRC_SIZE); +} + +static ssize_t log_read_whence(size_t len, int whence) +{ + ssize_t n; + + n = read_whence_data(log_fd, log_rd_buf, len, whence); + if (n < 0) + return -1; + compute_crc(log_rd_buf, log_rd_crc, len); + return n; +} + +static ssize_t log_write_whence(size_t len, int whence) +{ + compute_crc(log_wr_buf, log_wr_crc, len); + return write_whence_data(log_fd, log_wr_buf, len, whence); +} + +static ssize_t phys_read_whence(size_t id, size_t len, int whence) +{ + ssize_t n_data, n_crc; + int fd = ((id == 1) ? phys1_fd : phys2_fd); + + unsigned char *data_buf = ((id == 1) ? phys1_rd_buf : phys2_rd_buf); + unsigned char *crc_buf = ((id == 1) ? phys1_rd_crc : phys2_rd_crc); + + n_data = read_whence_data(fd, data_buf, len, whence); + if (n_data < 0) + return -1; + n_crc = read_whence_crc(fd, crc_buf, len, whence); + if (n_crc < 0) + return -1; + return n_data; +} + +static ssize_t phys_write_whence(size_t id, size_t len, int whence) +{ + ssize_t n_data, n_crc; + int fd = ((id == 1) ? phys1_fd : phys2_fd); + unsigned char *data_buf = ((id == 1) ? phys1_wr_buf : phys2_wr_buf); + unsigned char *crc_buf = ((id == 1) ? phys1_wr_crc : phys2_wr_crc); + + compute_crc(data_buf, crc_buf, len); + n_data = write_whence_data(fd, data_buf, len, whence); + if (n_data < 0) + return -1; + n_crc = write_whence_crc(fd, crc_buf, len, whence); + if (n_crc < 0) + return -1; + return n_data; +} + +static void corrupt_buffer(void *buffer, size_t sectors) +{ + size_t i; + + for (i = 0; i < sectors; i++) + ((unsigned char *) buffer)[i * ONE_SECTOR] = CORRUPT_DATA; +} + +static ssize_t phys_corrupt_and_write_whence(size_t id, size_t len, + size_t sectors, int whence) +{ + ssize_t n_data, n_crc; + int fd = ((id == 1) ? phys1_fd : phys2_fd); + unsigned char *data_buf = ((id == 1) ? phys1_wr_buf : phys2_wr_buf); + unsigned char *crc_buf = ((id == 1) ? phys1_wr_crc : phys2_wr_crc); + + compute_crc(data_buf, crc_buf, len); + corrupt_buffer(data_buf, sectors); + n_data = write_whence_data(fd, data_buf, len, whence); + if (n_data < 0) + return -1; + n_crc = write_whence_crc(fd, crc_buf, len, whence); + if (n_crc < 0) + return -1; + return n_data; +} + +static ssize_t log_read_start(size_t len) +{ + return log_read_whence(len, START); +} + +static ssize_t log_read_middle(size_t len) +{ + return log_read_whence(len, MIDDLE); +} + +static ssize_t log_read_end(size_t len) +{ + return log_read_whence(len, END); +} + +static ssize_t log_write_start(size_t len) +{ + return log_write_whence(len, START); +} + +static ssize_t log_write_middle(size_t len) +{ + return log_write_whence(len, MIDDLE); +} + +static ssize_t log_write_end(size_t len) +{ + return log_write_whence(len, END); +} + +static ssize_t phys1_read_start(size_t len) +{ + return phys_read_whence(1, len, START); +} + +#if 0 +static ssize_t phys1_read_middle(size_t len) +{ + return phys_read_whence(1, len, MIDDLE); +} + +static ssize_t phys1_read_end(size_t len) +{ + return phys_read_whence(1, len, END); +} +#endif + +static ssize_t phys1_write_start(size_t len) +{ + return phys_write_whence(1, len, START); +} + +static ssize_t phys1_corrupt_and_write_start(size_t len, size_t sectors) +{ + return phys_corrupt_and_write_whence(1, len, sectors, START); +} + +#if 0 +static ssize_t phys1_write_middle(size_t len) +{ + return phys_write_whence(1, len, MIDDLE); +} + +static ssize_t phys1_write_end(size_t len) +{ + return phys_write_whence(1, len, END); +} +#endif + +static ssize_t phys2_read_start(size_t len) +{ + return phys_read_whence(2, len, START); +} + +#if 0 +static ssize_t phys2_read_middle(size_t len) +{ + return phys_read_whence(2, len, MIDDLE); +} + +static ssize_t phys2_read_end(size_t len) +{ + return phys_read_whence(2, len, END); +} +#endif + +static ssize_t phys2_write_start(size_t len) +{ + return phys_write_whence(2, len, START); +} + +static ssize_t phys2_corrupt_and_write_start(size_t len, size_t sectors) +{ + return phys_corrupt_and_write_whence(2, len, sectors, START); +} + +#if 0 +static ssize_t phys2_write_middle(size_t len) +{ + return phys_write_whence(2, len, MIDDLE); +} + +static ssize_t phys2_write_end(size_t len) +{ + return phys_write_whence(2, len, END); +} +#endif + +static int cmp_data_log_rd_phys1_wr(size_t len) +{ + return memcmp(log_rd_buf, phys1_wr_buf, len); +} + +static int cmp_data_log_rd_phys2_wr(size_t len) +{ + return memcmp(log_rd_buf, phys2_wr_buf, len); +} + +static int cmp_data_log_rd_phys1_rd(size_t len) +{ + return memcmp(log_rd_buf, phys1_rd_buf, len); +} + +static int cmp_data_log_rd_phys2_rd(size_t len) +{ + return memcmp(log_rd_buf, phys2_rd_buf, len); +} + +static int cmp_data_log_wr_phys1_rd(size_t len) +{ + return memcmp(log_wr_buf, phys1_rd_buf, len); +} + +static int cmp_data_log_wr_phys2_rd(size_t len) +{ + return memcmp(log_wr_buf, phys2_rd_buf, len); +} + +static int cmp_crc_log_rd_phys1_wr(size_t data_len) +{ + return memcmp(log_rd_crc, phys1_wr_crc, data_len / ONE_SECTOR * CRC_SIZE); +} + +static int cmp_crc_log_rd_phys2_wr(size_t data_len) +{ + return memcmp(log_rd_crc, phys2_wr_crc, data_len / ONE_SECTOR * CRC_SIZE); +} + +static int cmp_crc_log_rd_phys1_rd(size_t data_len) +{ + return memcmp(log_rd_crc, phys1_rd_crc, data_len / ONE_SECTOR * CRC_SIZE); +} + +static int cmp_crc_log_rd_phys2_rd(size_t data_len) +{ + return memcmp(log_rd_crc, phys2_rd_crc, data_len / ONE_SECTOR * CRC_SIZE); +} + +static int cmp_crc_log_wr_phys1_rd(size_t data_len) +{ + return memcmp(log_wr_crc, phys1_rd_crc, data_len / ONE_SECTOR * CRC_SIZE); +} + +static int cmp_crc_log_wr_phys2_rd(size_t data_len) +{ + return memcmp(log_wr_crc, phys2_rd_crc, data_len / ONE_SECTOR * CRC_SIZE); +} + +static void drop_caches(void) +{ + int fd; + char buf[] = "1\n"; + + fd = open("/proc/sys/vm/drop_caches", O_WRONLY); + assert(fd >= 0); + write(fd, buf, strlen(buf)); + close(fd); +} + +static void flush_disk_buffers(void) +{ + sync(); + //system("/bin/echo 1 > /proc/sys/vm/drop_caches"); + drop_caches(); +} + +static void dump_data(const void *buf, size_t len, const char *header) +{ + size_t i; + + printf("%s:", header); + for (i = 0; i < len / sizeof(unsigned int); i++) { + if (i % 4 == 0) + printf("\n\t"); + printf(" %08x", ((unsigned int *) buf)[i]); + } + printf("\n\n"); +} + +void init_world(void) +{ + /* Cleanup if required. */ + flush_disk_buffers(); + system("/sbin/rmmod " SSR_BASE_NAME " > /dev/null 2>&1"); + system("/bin/cat /proc/devices | /bin/grep " SSR_BASE_NAME " > /dev/null"); + system("/bin/rm -f " LOGICAL_DISK_NAME); + + assert(system("/sbin/insmod " SSR_MOD_NAME) == 0); + assert(system("/bin/cat /proc/devices | /bin/grep " SSR_BASE_NAME + " > /dev/null") == 0); + assert(access(PHYSICAL_DISK1_NAME, F_OK) == 0); + assert(access(PHYSICAL_DISK2_NAME, F_OK) == 0); + assert(access(LOGICAL_DISK_NAME, F_OK) == 0); + + log_rd_buf = calloc(1024 * 1024, 1); + assert(log_rd_buf != NULL); + log_wr_buf = calloc(1024 * 1024, 1); + assert(log_rd_buf != NULL); + phys1_rd_buf = calloc(1024 * 1024, 1); + assert(phys1_rd_buf != NULL); + phys1_wr_buf = calloc(1024 * 1024, 1); + assert(phys1_wr_buf != NULL); + phys2_rd_buf = calloc(1024 * 1024, 1); + assert(phys2_rd_buf != NULL); + phys2_wr_buf = calloc(1024 * 1024, 1); + assert(phys2_wr_buf != NULL); + log_rd_crc = calloc(8 * 1024, 1); + assert(log_rd_crc != NULL); + log_wr_crc = calloc(8 * 1024, 1); + assert(log_rd_crc != NULL); + phys1_rd_crc = calloc(8 * 1024, 1); + assert(phys1_rd_crc != NULL); + phys1_wr_crc = calloc(8 * 1024, 1); + assert(phys1_wr_crc != NULL); + phys2_rd_crc = calloc(8 * 1024, 1); + assert(phys2_rd_crc != NULL); + phys2_wr_crc = calloc(8 * 1024, 1); + assert(phys2_wr_crc != NULL); +} + +void cleanup_world(void) +{ + flush_disk_buffers(); + system("/sbin/rmmod " SSR_BASE_NAME); + system("/bin/cat /proc/devices | /bin/grep " SSR_BASE_NAME " > /dev/null"); + system("/bin/rm -f " LOGICAL_DISK_NAME); + free(log_rd_buf); free(log_wr_buf); + free(phys1_rd_buf); free(phys1_wr_buf); + free(phys2_rd_buf); free(phys2_wr_buf); + free(log_rd_crc); free(log_wr_crc); + free(phys1_rd_crc); free(phys1_wr_crc); + free(phys2_rd_crc); free(phys2_wr_crc); +} + +static void make_disks_dirty(void) +{ + fill_buffer(phys1_wr_buf, PHYS1_DISK_DIRTY_DATA, ONE_MEG); + fill_buffer(phys1_wr_crc, PHYS1_DISK_DIRTY_DATA, ONE_MEG / ONE_SECTOR * CRC_SIZE); + fill_buffer(phys2_wr_buf, PHYS2_DISK_DIRTY_DATA, ONE_MEG); + fill_buffer(phys2_wr_crc, PHYS2_DISK_DIRTY_DATA, ONE_MEG / ONE_SECTOR * CRC_SIZE); + phys1_write_start(ONE_MEG); + phys2_write_start(ONE_MEG); +} + +static void make_buffers_dirty(void) +{ + fill_buffer(phys1_wr_buf, PHYS1_BUF_DIRTY_DATA, ONE_MEG); + fill_buffer(phys1_wr_crc, PHYS1_BUF_DIRTY_DATA, ONE_MEG / ONE_SECTOR * CRC_SIZE); + fill_buffer(phys1_rd_buf, PHYS1_BUF_DIRTY_DATA, ONE_MEG); + fill_buffer(phys1_rd_crc, PHYS1_BUF_DIRTY_DATA, ONE_MEG / ONE_SECTOR * CRC_SIZE); + fill_buffer(phys2_wr_buf, PHYS2_BUF_DIRTY_DATA, ONE_MEG); + fill_buffer(phys2_wr_crc, PHYS2_BUF_DIRTY_DATA, ONE_MEG / ONE_SECTOR * CRC_SIZE); + fill_buffer(phys2_rd_buf, PHYS2_BUF_DIRTY_DATA, ONE_MEG); + fill_buffer(phys2_rd_crc, PHYS2_BUF_DIRTY_DATA, ONE_MEG / ONE_SECTOR * CRC_SIZE); + fill_buffer(log_wr_buf, LOG_BUF_DIRTY_DATA, ONE_MEG); + fill_buffer(log_wr_crc, LOG_BUF_DIRTY_DATA, ONE_MEG / ONE_SECTOR * CRC_SIZE); + fill_buffer(log_rd_buf, LOG_BUF_DIRTY_DATA, ONE_MEG); + fill_buffer(log_rd_crc, LOG_BUF_DIRTY_DATA, ONE_MEG / ONE_SECTOR * CRC_SIZE); +} + +static void init_test(void) +{ + flush_disk_buffers(); + log_fd = open(LOGICAL_DISK_NAME, O_RDWR); + assert(log_fd >= 0); + phys1_fd = open(PHYSICAL_DISK1_NAME, O_RDWR); + assert(phys1_fd >= 0); + phys2_fd = open(PHYSICAL_DISK2_NAME, O_RDWR); + assert(phys2_fd >= 0); + make_disks_dirty(); + make_buffers_dirty(); + flush_disk_buffers(); +} + +static void cleanup_test(void) +{ + close(log_fd); + close(phys1_fd); + close(phys2_fd); +} + +static void open_logical(void) +{ + int fd; + + fd = open(LOGICAL_DISK_NAME, O_RDWR); + basic_test(fd >= 0); + close(fd); +} + +static void close_logical(void) +{ + int fd, rc; + + fd = open(LOGICAL_DISK_NAME, O_RDWR); + rc = close(fd); + basic_test(rc == 0); +} + +static void use_after_close_invalid(void) +{ + int fd, val; + ssize_t n; + + fd = open(LOGICAL_DISK_NAME, O_RDWR); + close(fd); + n = read(fd, &val, sizeof(val)); + basic_test(n < 0); +} + +static void lseek_logical(void) +{ + off_t offset; + + init_test(); + offset = lseek(log_fd, LOGICAL_DISK_SIZE / 2, SEEK_SET); + basic_test(offset == LOGICAL_DISK_SIZE / 2); + cleanup_test(); +} + +static void read_one_sector_start(void) +{ + ssize_t n; + + init_test(); + n = log_read_start(ONE_SECTOR); + basic_test(n == ONE_SECTOR); + cleanup_test(); +} + +static void read_one_sector_middle(void) +{ + ssize_t n; + + init_test(); + n = log_read_middle(ONE_SECTOR); + basic_test(n == ONE_SECTOR); + cleanup_test(); +} + +static void read_one_sector_end(void) +{ + ssize_t n; + + init_test(); + n = log_read_end(ONE_SECTOR); + basic_test(n == ONE_SECTOR); + cleanup_test(); +} + +static void write_one_sector_start(void) +{ + ssize_t n; + + init_test(); + n = log_write_start(ONE_SECTOR); + basic_test(n == ONE_SECTOR); + cleanup_test(); +} + +static void write_one_sector_middle(void) +{ + ssize_t n; + + init_test(); + n = log_write_middle(ONE_SECTOR); + basic_test(n == ONE_SECTOR); + cleanup_test(); +} + +static void write_one_sector_end(void) +{ + ssize_t n; + + init_test(); + n = log_write_end(ONE_SECTOR); + basic_test(n == ONE_SECTOR); + cleanup_test(); +} + +static void read_one_page_start(void) +{ + ssize_t n; + + init_test(); + n = log_read_start(ONE_PAGE); + basic_test(n == ONE_PAGE); + cleanup_test(); +} + +static void read_one_page_middle(void) +{ + ssize_t n; + + init_test(); + n = log_read_middle(ONE_PAGE); + basic_test(n == ONE_PAGE); + cleanup_test(); +} + +static void read_one_page_end(void) +{ + ssize_t n; + + init_test(); + n = log_read_end(ONE_PAGE); + basic_test(n == ONE_PAGE); + cleanup_test(); +} + +static void write_one_page_start(void) +{ + ssize_t n; + + init_test(); + n = log_write_start(ONE_PAGE); + basic_test(n == ONE_PAGE); + cleanup_test(); +} + +static void write_one_page_middle(void) +{ + ssize_t n; + + init_test(); + n = log_write_middle(ONE_PAGE); + basic_test(n == ONE_PAGE); + cleanup_test(); +} + +static void write_one_page_end(void) +{ + ssize_t n; + + init_test(); + n = log_write_end(ONE_PAGE); + basic_test(n == ONE_PAGE); + cleanup_test(); +} + +static void read_two_pages_start(void) +{ + ssize_t n; + + init_test(); + n = log_read_start(TWO_PAGES); + basic_test(n == TWO_PAGES); + cleanup_test(); +} + +static void read_two_pages_middle(void) +{ + ssize_t n; + + init_test(); + n = log_read_middle(TWO_PAGES); + basic_test(n == TWO_PAGES); + cleanup_test(); +} + +static void read_two_pages_end(void) +{ + ssize_t n; + + init_test(); + n = log_read_end(TWO_PAGES); + basic_test(n == TWO_PAGES); + cleanup_test(); +} + +static void write_two_pages_start(void) +{ + ssize_t n; + + init_test(); + n = log_write_start(TWO_PAGES); + basic_test(n == TWO_PAGES); + cleanup_test(); +} + +static void write_two_pages_middle(void) +{ + ssize_t n; + + init_test(); + n = log_write_middle(TWO_PAGES); + basic_test(n == TWO_PAGES); + cleanup_test(); +} + +static void write_two_pages_end(void) +{ + ssize_t n; + + init_test(); + n = log_write_end(TWO_PAGES); + basic_test(n == TWO_PAGES); + cleanup_test(); +} + +static void read_one_meg_start(void) +{ + ssize_t n; + + init_test(); + n = log_read_start(ONE_MEG); + basic_test(n == ONE_MEG); + cleanup_test(); +} + +static void read_one_meg_middle(void) +{ + ssize_t n; + + init_test(); + n = log_read_middle(ONE_MEG); + basic_test(n == ONE_MEG); + cleanup_test(); +} + +static void read_one_meg_end(void) +{ + ssize_t n; + + init_test(); + n = log_read_end(ONE_MEG); + basic_test(n == ONE_MEG); + cleanup_test(); +} + +static void write_one_meg_start(void) +{ + ssize_t n; + + init_test(); + n = log_write_start(ONE_MEG); + basic_test(n == ONE_MEG); + cleanup_test(); +} + +static void write_one_meg_middle(void) +{ + ssize_t n; + + init_test(); + n = log_write_middle(ONE_MEG); + basic_test(n == ONE_MEG); + cleanup_test(); +} + +static void write_one_meg_end(void) +{ + ssize_t n; + + init_test(); + n = log_write_end(ONE_MEG); + basic_test(n == ONE_MEG); + cleanup_test(); +} + +static void read_boundary_one_sector(void) +{ + ssize_t n; + + init_test(); + lseek(log_fd, LOGICAL_DISK_SIZE, SEEK_SET); + n = xread(log_fd, log_rd_buf, ONE_SECTOR); + basic_test(n == 0); + cleanup_test(); +} + +static void read_boundary_one_page(void) +{ + ssize_t n; + + init_test(); + lseek(log_fd, LOGICAL_DISK_SIZE, SEEK_SET); + n = xread(log_fd, log_rd_buf, ONE_PAGE); + basic_test(n == 0); + cleanup_test(); +} + +static void read_boundary_two_pages(void) +{ + ssize_t n; + + init_test(); + lseek(log_fd, LOGICAL_DISK_SIZE, SEEK_SET); + n = xread(log_fd, log_rd_buf, TWO_PAGES); + basic_test(n == 0); + cleanup_test(); +} + +static void read_boundary_one_meg(void) +{ + ssize_t n; + + init_test(); + lseek(log_fd, LOGICAL_DISK_SIZE, SEEK_SET); + n = xread(log_fd, log_rd_buf, ONE_MEG); + basic_test(n == 0); + cleanup_test(); +} + +static void write_boundary_one_sector(void) +{ + ssize_t n; + + init_test(); + lseek(log_fd, LOGICAL_DISK_SIZE, SEEK_SET); + n = xwrite(log_fd, log_rd_buf, ONE_SECTOR); + basic_test(n < 0); + cleanup_test(); +} + +static void write_boundary_one_page(void) +{ + ssize_t n; + + init_test(); + lseek(log_fd, LOGICAL_DISK_SIZE, SEEK_SET); + n = xwrite(log_fd, log_wr_buf, ONE_PAGE); + basic_test(n < 0); + cleanup_test(); +} + +static void write_boundary_two_pages(void) +{ + ssize_t n; + + init_test(); + lseek(log_fd, LOGICAL_DISK_SIZE, SEEK_SET); + n = xwrite(log_fd, log_wr_buf, TWO_PAGES); + basic_test(n < 0); + cleanup_test(); +} + +static void write_boundary_one_meg(void) +{ + ssize_t n; + + init_test(); + lseek(log_fd, LOGICAL_DISK_SIZE, SEEK_SET); + n = xwrite(log_fd, log_wr_buf, ONE_MEG); + basic_test(n < 0); + cleanup_test(); +} + +static size_t get_free_memory(void) +{ + FILE *f; + size_t i; + char buf[256]; + char *p; + + f = fopen("/proc/meminfo", "rt"); + assert(f != NULL); + /* Second line is 'MemFree: ...' */ + fgets(buf, 256, f); + fgets(buf, 256, f); + fclose(f); + + p = NULL; + for (i = 0; i < 256; i++) + if (buf[i] == ':') { + p = buf+i+1; + break; + } + + return strtoul(p, NULL, 10); +} + +static void memory_is_freed(void) +{ + size_t mem_used_before, mem_used_after; + size_t i; + + init_test(); + mem_used_before = get_free_memory(); + for (i = 0; i < 5; i++) + log_write_start(ONE_MEG); + mem_used_after = get_free_memory(); + + /* We assume 3MB (3072KB) is a reasonable memory usage in writes. */ + basic_test(mem_used_after < mem_used_before + 3072 && + mem_used_before < mem_used_after + 3072); + cleanup_test(); +} + +static void write_one_sector_check_phys1(void) +{ + int rc; + size_t len = ONE_SECTOR; + + init_test(); + log_fill_buffer(len); + log_write_start(len); + flush_disk_buffers(); + phys1_read_start(len); + rc = cmp_data_log_wr_phys1_rd(len); + basic_test(rc == 0); + cleanup_test(); +} + +static void write_one_page_check_phys1(void) +{ + int rc; + size_t len = ONE_PAGE; + + init_test(); + log_fill_buffer(len); + log_write_start(len); + flush_disk_buffers(); + phys1_read_start(len); + rc = cmp_data_log_wr_phys1_rd(len); + basic_test(rc == 0); + cleanup_test(); +} + +static void write_two_pages_check_phys1(void) +{ + int rc; + size_t len = TWO_PAGES; + + init_test(); + log_fill_buffer(len); + log_write_start(len); + flush_disk_buffers(); + phys1_read_start(len); + rc = cmp_data_log_wr_phys1_rd(len); + basic_test(rc == 0); + cleanup_test(); +} + +static void write_one_meg_check_phys1(void) +{ + int rc; + size_t len = ONE_MEG; + + init_test(); + log_fill_buffer(len); + log_write_start(len); + flush_disk_buffers(); + phys1_read_start(len); + rc = cmp_data_log_wr_phys1_rd(len); + basic_test(rc == 0); + cleanup_test(); +} + +static void write_one_sector_check_phys(void) +{ + int rc1, rc2; + size_t len = ONE_SECTOR; + + init_test(); + log_fill_buffer(len); + log_write_start(len); + flush_disk_buffers(); + phys1_read_start(len); + phys2_read_start(len); + rc1 = cmp_data_log_wr_phys1_rd(len); + rc2 = cmp_data_log_wr_phys2_rd(len); + basic_test(rc1 == 0 && rc2 == 0); + cleanup_test(); +} + +static void write_one_page_check_phys(void) +{ + int rc1, rc2; + size_t len = ONE_PAGE; + + init_test(); + log_fill_buffer(len); + log_write_start(len); + flush_disk_buffers(); + phys1_read_start(len); + phys2_read_start(len); + rc1 = cmp_data_log_wr_phys1_rd(len); + rc2 = cmp_data_log_wr_phys2_rd(len); + basic_test(rc1 == 0 && rc2 == 0); + cleanup_test(); +} + +static void write_two_pages_check_phys(void) +{ + int rc1, rc2; + size_t len = TWO_PAGES; + + init_test(); + log_fill_buffer(len); + log_write_start(len); + flush_disk_buffers(); + phys1_read_start(len); + phys2_read_start(len); + rc1 = cmp_data_log_wr_phys1_rd(len); + rc2 = cmp_data_log_wr_phys2_rd(len); + basic_test(rc1 == 0 && rc2 == 0); + cleanup_test(); +} + +static void write_one_meg_check_phys(void) +{ + int rc1, rc2; + size_t len = ONE_MEG; + + init_test(); + log_fill_buffer(len); + log_write_start(len); + flush_disk_buffers(); + phys1_read_start(len); + phys2_read_start(len); + rc1 = cmp_data_log_wr_phys1_rd(len); + rc2 = cmp_data_log_wr_phys2_rd(len); + basic_test(rc1 == 0 && rc2 == 0); + cleanup_test(); +} + +static void read_one_sector_after_write(void) +{ + int rc; + size_t len = ONE_SECTOR; + + init_test(); + phys_fill_buffer(len); + phys1_write_start(len); + phys2_write_start(len); + flush_disk_buffers(); + log_read_start(len); + rc = cmp_data_log_rd_phys1_wr(len); + basic_test(rc == 0); + cleanup_test(); +} + +static void read_one_page_after_write(void) +{ + int rc; + size_t len = ONE_PAGE; + + init_test(); + phys_fill_buffer(len); + phys1_write_start(len); + phys2_write_start(len); + flush_disk_buffers(); + log_read_start(len); + rc = cmp_data_log_rd_phys1_wr(len); + basic_test(rc == 0); + cleanup_test(); +} + +static void read_two_pages_after_write(void) +{ + int rc; + size_t len = TWO_PAGES; + + init_test(); + phys_fill_buffer(len); + phys1_write_start(len); + phys2_write_start(len); + flush_disk_buffers(); + log_read_start(len); + rc = cmp_data_log_rd_phys1_wr(len); + basic_test(rc == 0); + cleanup_test(); +} + +static void read_one_meg_after_write(void) +{ + int rc; + size_t len = ONE_MEG; + + init_test(); + phys_fill_buffer(len); + phys1_write_start(len); + phys2_write_start(len); + flush_disk_buffers(); + log_read_start(len); + rc = cmp_data_log_rd_phys1_wr(len); + basic_test(rc == 0); + cleanup_test(); +} + +static void write_one_sector_check_phys1_crc(void) +{ + int rc; + size_t len = ONE_SECTOR; + + init_test(); + log_fill_buffer(len); + log_write_start(len); + flush_disk_buffers(); + phys1_read_start(len); + rc = cmp_crc_log_wr_phys1_rd(len); + basic_test(rc == 0); + cleanup_test(); +} + +static void write_one_page_check_phys1_crc(void) +{ + int rc; + size_t len = ONE_PAGE; + + init_test(); + log_fill_buffer(len); + log_write_start(len); + flush_disk_buffers(); + phys1_read_start(len); + rc = cmp_crc_log_wr_phys1_rd(len); + basic_test(rc == 0); + cleanup_test(); +} + +static void write_two_pages_check_phys1_crc(void) +{ + int rc; + size_t len = TWO_PAGES; + + init_test(); + log_fill_buffer(len); + log_write_start(len); + flush_disk_buffers(); + phys1_read_start(len); + rc = cmp_crc_log_wr_phys1_rd(len); + basic_test(rc == 0); + cleanup_test(); +} + +static void write_one_meg_check_phys1_crc(void) +{ + int rc; + size_t len = ONE_MEG; + + init_test(); + log_fill_buffer(len); + log_write_start(len); + flush_disk_buffers(); + phys1_read_start(len); + rc = cmp_crc_log_wr_phys1_rd(len); + basic_test(rc == 0); + cleanup_test(); +} + +static void write_one_sector_check_phys_crc(void) +{ + int rc1, rc2; + size_t len = ONE_SECTOR; + + init_test(); + log_fill_buffer(len); + log_write_start(len); + flush_disk_buffers(); + phys1_read_start(len); + phys2_read_start(len); + rc1 = cmp_crc_log_wr_phys1_rd(len); + rc2 = cmp_crc_log_wr_phys2_rd(len); + basic_test(rc1 == 0 && rc2 == 0); + cleanup_test(); +} + +static void write_one_page_check_phys_crc(void) +{ + int rc1, rc2; + size_t len = ONE_PAGE; + + init_test(); + log_fill_buffer(len); + log_write_start(len); + flush_disk_buffers(); + phys1_read_start(len); + phys2_read_start(len); + rc1 = cmp_crc_log_wr_phys1_rd(len); + rc2 = cmp_crc_log_wr_phys2_rd(len); + basic_test(rc1 == 0 && rc2 == 0); + cleanup_test(); +} + +static void write_two_pages_check_phys_crc(void) +{ + int rc1, rc2; + size_t len = TWO_PAGES; + + init_test(); + log_fill_buffer(len); + log_write_start(len); + flush_disk_buffers(); + phys1_read_start(len); + phys2_read_start(len); + rc1 = cmp_crc_log_wr_phys1_rd(len); + rc2 = cmp_crc_log_wr_phys2_rd(len); + basic_test(rc1 == 0 && rc2 == 0); + cleanup_test(); +} + +static void write_one_meg_check_phys_crc(void) +{ + int rc1, rc2; + size_t len = ONE_MEG; + + init_test(); + log_fill_buffer(len); + log_write_start(len); + flush_disk_buffers(); + phys1_read_start(len); + phys2_read_start(len); + rc1 = cmp_crc_log_wr_phys1_rd(len); + rc2 = cmp_crc_log_wr_phys2_rd(len); + basic_test(rc1 == 0 && rc2 == 0); + cleanup_test(); +} + +static void corrupt_read_correct_one_sector_disk1(void) +{ + int rc_data, rc_crc; + size_t len = ONE_SECTOR; + + init_test(); + phys_fill_buffer(len); + phys1_corrupt_and_write_start(len, 1); + phys2_write_start(len); + flush_disk_buffers(); + log_read_start(len); + rc_data = cmp_data_log_rd_phys2_wr(len); + rc_crc = cmp_crc_log_rd_phys2_wr(len); + basic_test(rc_data == 0 && rc_crc == 0); + cleanup_test(); +} + +static void corrupt_read_correct_one_sector_in_page_disk1(void) +{ + int rc_data, rc_crc; + size_t len = ONE_PAGE; + + init_test(); + phys_fill_buffer(len); + phys1_corrupt_and_write_start(len, 1); + phys2_write_start(len); + flush_disk_buffers(); + log_read_start(len); + rc_data = cmp_data_log_rd_phys2_wr(len); + rc_crc = cmp_crc_log_rd_phys2_wr(len); + basic_test(rc_data == 0 && rc_crc == 0); + cleanup_test(); +} + +static void corrupt_read_correct_one_page_disk1(void) +{ + int rc_data, rc_crc; + size_t len = ONE_PAGE; + + init_test(); + phys_fill_buffer(len); + phys1_corrupt_and_write_start(len, ONE_PAGE / ONE_SECTOR); + phys2_write_start(len); + flush_disk_buffers(); + log_read_start(len); + rc_data = cmp_data_log_rd_phys2_wr(len); + rc_crc = cmp_crc_log_rd_phys2_wr(len); + basic_test(rc_data == 0 && rc_crc == 0); + cleanup_test(); +} + +static void corrupt_read_correct_ten_pages_in_one_meg_disk1(void) +{ + int rc_data, rc_crc; + size_t len = ONE_MEG; + + init_test(); + phys_fill_buffer(len); + phys1_corrupt_and_write_start(len, TEN_PAGES / ONE_SECTOR); + phys2_write_start(len); + flush_disk_buffers(); + log_read_start(len); + rc_data = cmp_data_log_rd_phys2_wr(len); + rc_crc = cmp_crc_log_rd_phys2_wr(len); + basic_test(rc_data == 0 && rc_crc == 0); + cleanup_test(); +} + +static void corrupt_read_correct_one_meg_disk1(void) +{ + int rc_data, rc_crc; + size_t len = ONE_MEG; + + init_test(); + phys_fill_buffer(len); + phys1_corrupt_and_write_start(len, ONE_MEG / ONE_SECTOR); + phys2_write_start(len); + flush_disk_buffers(); + log_read_start(len); + rc_data = cmp_data_log_rd_phys2_wr(len); + rc_crc = cmp_crc_log_rd_phys2_wr(len); + basic_test(rc_data == 0 && rc_crc == 0); + cleanup_test(); +} + +static void recover_one_sector_disk1(void) +{ + int rc_data, rc_crc; + size_t len = ONE_SECTOR; + + init_test(); + phys_fill_buffer(len); + phys1_corrupt_and_write_start(len, 1); + phys2_write_start(len); + flush_disk_buffers(); + log_read_start(len); + flush_disk_buffers(); + phys1_read_start(len); + rc_data = cmp_data_log_rd_phys1_rd(len); + rc_crc = cmp_crc_log_rd_phys1_rd(len); + basic_test(rc_data == 0 && rc_crc == 0); + cleanup_test(); +} + +static void recover_one_sector_in_page_disk1(void) +{ + int rc_data, rc_crc; + size_t len = ONE_PAGE; + + init_test(); + phys_fill_buffer(len); + phys1_corrupt_and_write_start(len, 1); + phys2_write_start(len); + flush_disk_buffers(); + log_read_start(len); + flush_disk_buffers(); + phys1_read_start(len); + rc_data = cmp_data_log_rd_phys1_rd(len); + rc_crc = cmp_crc_log_rd_phys1_rd(len); + basic_test(rc_data == 0 && rc_crc == 0); + cleanup_test(); +} + +static void recover_one_page_disk1(void) +{ + int rc_data, rc_crc; + size_t len = ONE_PAGE; + + init_test(); + phys_fill_buffer(len); + phys1_corrupt_and_write_start(len, ONE_PAGE / ONE_SECTOR); + phys2_write_start(len); + flush_disk_buffers(); + log_read_start(len); + flush_disk_buffers(); + phys1_read_start(len); + rc_data = cmp_data_log_rd_phys1_rd(len); + rc_crc = cmp_crc_log_rd_phys1_rd(len); + basic_test(rc_data == 0 && rc_crc == 0); + cleanup_test(); +} + +static void recover_ten_page_in_one_meg_disk1(void) +{ + int rc_data, rc_crc; + size_t len = ONE_MEG; + + init_test(); + phys_fill_buffer(len); + phys1_corrupt_and_write_start(len, TEN_PAGES / ONE_SECTOR); + phys2_write_start(len); + flush_disk_buffers(); + log_read_start(len); + flush_disk_buffers(); + phys1_read_start(len); + rc_data = cmp_data_log_rd_phys1_rd(len); + rc_crc = cmp_crc_log_rd_phys1_rd(len); + basic_test(rc_data == 0 && rc_crc == 0); + cleanup_test(); +} + +static void recover_one_meg_disk1(void) +{ + int rc_data, rc_crc; + size_t len = ONE_MEG; + + init_test(); + phys_fill_buffer(len); + phys1_corrupt_and_write_start(len, ONE_MEG / ONE_SECTOR); + phys2_write_start(len); + flush_disk_buffers(); + log_read_start(len); + flush_disk_buffers(); + phys1_read_start(len); + rc_data = cmp_data_log_rd_phys1_rd(len); + rc_crc = cmp_crc_log_rd_phys1_rd(len); + basic_test(rc_data == 0 && rc_crc == 0); + cleanup_test(); +} + +static void corrupt_read_correct_one_sector_disk2(void) +{ + int rc_data, rc_crc; + size_t len = ONE_SECTOR; + + init_test(); + phys_fill_buffer(len); + phys1_write_start(len); + phys2_corrupt_and_write_start(len, 1); + flush_disk_buffers(); + log_read_start(len); + rc_data = cmp_data_log_rd_phys1_wr(len); + rc_crc = cmp_crc_log_rd_phys1_wr(len); + basic_test(rc_data == 0 && rc_crc == 0); + cleanup_test(); +} + +static void corrupt_read_correct_one_sector_in_page_disk2(void) +{ + int rc_data, rc_crc; + size_t len = ONE_PAGE; + + init_test(); + phys_fill_buffer(len); + phys1_write_start(len); + phys2_corrupt_and_write_start(len, 1); + flush_disk_buffers(); + log_read_start(len); + rc_data = cmp_data_log_rd_phys1_wr(len); + rc_crc = cmp_crc_log_rd_phys1_wr(len); + basic_test(rc_data == 0 && rc_crc == 0); + cleanup_test(); +} + +static void corrupt_read_correct_one_page_disk2(void) +{ + int rc_data, rc_crc; + size_t len = ONE_PAGE; + + init_test(); + phys_fill_buffer(len); + phys1_write_start(len); + phys2_corrupt_and_write_start(len, ONE_PAGE / ONE_SECTOR); + flush_disk_buffers(); + log_read_start(len); + rc_data = cmp_data_log_rd_phys1_wr(len); + rc_crc = cmp_crc_log_rd_phys1_wr(len); + basic_test(rc_data == 0 && rc_crc == 0); + cleanup_test(); +} + +static void corrupt_read_correct_ten_pages_in_one_meg_disk2(void) +{ + int rc_data, rc_crc; + size_t len = ONE_MEG; + + init_test(); + phys_fill_buffer(len); + phys1_write_start(len); + phys2_corrupt_and_write_start(len, TEN_PAGES / ONE_SECTOR); + flush_disk_buffers(); + log_read_start(len); + rc_data = cmp_data_log_rd_phys1_wr(len); + rc_crc = cmp_crc_log_rd_phys1_wr(len); + basic_test(rc_data == 0 && rc_crc == 0); + flush_disk_buffers(); + cleanup_test(); +} + +static void corrupt_read_correct_one_meg_disk2(void) +{ + int rc_data, rc_crc; + size_t len = ONE_MEG; + + init_test(); + phys_fill_buffer(len); + phys1_write_start(len); + phys2_corrupt_and_write_start(len, ONE_MEG / ONE_SECTOR); + flush_disk_buffers(); + log_read_start(len); + rc_data = cmp_data_log_rd_phys1_wr(len); + rc_crc = cmp_crc_log_rd_phys1_wr(len); + basic_test(rc_data == 0 && rc_crc == 0); + cleanup_test(); +} + +static void recover_one_sector_disk2(void) +{ + int rc_data, rc_crc; + size_t len = ONE_SECTOR; + + init_test(); + phys_fill_buffer(len); + phys1_write_start(len); + phys2_corrupt_and_write_start(len, 1); + flush_disk_buffers(); + log_read_start(len); + flush_disk_buffers(); + phys2_read_start(len); + rc_data = cmp_data_log_rd_phys2_rd(len); + rc_crc = cmp_crc_log_rd_phys2_rd(len); + basic_test(rc_data == 0 && rc_crc == 0); + cleanup_test(); +} + +static void recover_one_sector_in_page_disk2(void) +{ + int rc_data, rc_crc; + size_t len = ONE_PAGE; + + init_test(); + phys_fill_buffer(len); + phys1_write_start(len); + phys2_corrupt_and_write_start(len, 1); + flush_disk_buffers(); + log_read_start(len); + flush_disk_buffers(); + phys2_read_start(len); + rc_data = cmp_data_log_rd_phys2_rd(len); + rc_crc = cmp_crc_log_rd_phys2_rd(len); + basic_test(rc_data == 0 && rc_crc == 0); + cleanup_test(); +} + +static void recover_one_page_disk2(void) +{ + int rc_data, rc_crc; + size_t len = ONE_PAGE; + + init_test(); + phys_fill_buffer(len); + phys1_write_start(len); + phys2_corrupt_and_write_start(len, ONE_PAGE / ONE_SECTOR); + flush_disk_buffers(); + log_read_start(len); + flush_disk_buffers(); + phys2_read_start(len); + rc_data = cmp_data_log_rd_phys2_rd(len); + rc_crc = cmp_crc_log_rd_phys2_rd(len); + basic_test(rc_data == 0 && rc_crc == 0); + cleanup_test(); +} + +static void recover_ten_page_in_one_meg_disk2(void) +{ + int rc_data, rc_crc; + size_t len = ONE_MEG; + + init_test(); + phys_fill_buffer(len); + phys1_write_start(len); + phys2_corrupt_and_write_start(len, TEN_PAGES / ONE_SECTOR); + flush_disk_buffers(); + log_read_start(len); + flush_disk_buffers(); + phys2_read_start(len); + rc_data = cmp_data_log_rd_phys2_rd(len); + rc_crc = cmp_crc_log_rd_phys2_rd(len); + basic_test(rc_data == 0 && rc_crc == 0); + cleanup_test(); +} + +static void recover_one_meg_disk2(void) +{ + int rc_data, rc_crc; + size_t len = ONE_MEG; + + init_test(); + phys_fill_buffer(len); + phys1_write_start(len); + phys2_corrupt_and_write_start(len, ONE_MEG / ONE_SECTOR); + flush_disk_buffers(); + log_read_start(len); + flush_disk_buffers(); + phys2_read_start(len); + rc_data = cmp_data_log_rd_phys2_rd(len); + rc_crc = cmp_crc_log_rd_phys2_rd(len); + basic_test(rc_data == 0 && rc_crc == 0); + cleanup_test(); +} + +static void dual_error(void) +{ + ssize_t n; + size_t len = ONE_SECTOR; + + init_test(); + phys_fill_buffer(len); + phys1_corrupt_and_write_start(len, 1); + phys2_corrupt_and_write_start(len, 1); + flush_disk_buffers(); + n = log_read_start(len); + basic_test(n <= 0); + cleanup_test(); +} + +struct run_test_t test_array[] = { + { open_logical, "open(" LOGICAL_DISK_NAME ")", 4 }, + { close_logical, "close(" LOGICAL_DISK_NAME ")", 4 }, + { use_after_close_invalid, "use after close is invalid", 4 }, + { lseek_logical, "lseek(" LOGICAL_DISK_NAME ")", 4 }, + { read_one_sector_start, "read one sector from the start", 5 }, + { read_one_sector_middle, "read one sector from the middle", 5 }, + { read_one_sector_end, "read one sector from the end", 5 }, + { write_one_sector_start, "write one sector from the start", 5 }, + { write_one_sector_middle, "write one sector from the middle", 5 }, + { write_one_sector_end, "write one sector from the end", 5 }, + { read_one_page_start, "read one page from the start", 5 }, + { read_one_page_middle, "read one page from the middle", 5 }, + { read_one_page_end, "read one page from the end", 5 }, + { write_one_page_start, "write one page from the start", 5 }, + { write_one_page_middle, "write one page from the middle", 5 }, + { write_one_page_end, "write one page from the end", 5 }, + { read_two_pages_start, "read two pages from the start", 5 }, + { read_two_pages_middle, "read two pages from the middle", 5 }, + { read_two_pages_end, "read two pages from the end", 5 }, + { write_two_pages_start, "write two pages from the start", 5 }, + { write_two_pages_middle, "write two pages from the middle", 5 }, + { write_two_pages_end, "write two pages from the end", 5 }, + { read_one_meg_start, "read 1MB from the start", 5 }, + { read_one_meg_middle, "read 1MB from the middle", 5 }, + { read_one_meg_end, "read 1MB from the end", 5 }, + { write_one_meg_start, "write 1MB from the start", 5 }, + { write_one_meg_middle, "write 1MB from the middle", 5 }, + { write_one_meg_end, "write 1MB from the end", 5 }, + { read_boundary_one_sector, "read one sector outside boundary", 7 }, + { read_boundary_one_page, "read one page with contents outside boundary", 7 }, + { read_boundary_two_pages, "read two pages with contents outside boundary", 7 }, + { read_boundary_one_meg, "read 1MB with contents outside boundary", 7 }, + { write_boundary_one_sector, "write one sector outside boundary", 7 }, + { write_boundary_one_page, "write one page with contents outside boundary", 7 }, + { write_boundary_two_pages, "write two pages with contents outside boundary", 7 }, + { write_boundary_one_meg, "write 1MB with contents outside boundary", 7 }, + { memory_is_freed, "check memory is freed", 24 }, + { write_one_sector_check_phys1, "write one sector and check disk1 (no CRC check)", 15 }, + { write_one_page_check_phys1, "write one page and check disk1 (no CRC check)", 15 }, + { write_two_pages_check_phys1, "write two pages and check disk1 (no CRC check)", 15 }, + { write_one_meg_check_phys1, "write 1MB and check disk1 (no CRC check)", 15 }, + { write_one_sector_check_phys, "write one sector and check disks (no CRC check)", 15 }, + { write_one_page_check_phys, "write one page and check disks (no CRC check)", 15 }, + { write_two_pages_check_phys, "write two pages and check disks (no CRC check)", 15 }, + { write_one_meg_check_phys, "write 1MB and check disks (no CRC check)", 15 }, + { read_one_sector_after_write, "read one sector after physical write (correct CRC)", 16 }, + { read_one_page_after_write, "read one page after physical write (correct CRC)", 16 }, + { read_two_pages_after_write, "read two pages after physical write (correct CRC)", 16 }, + { read_one_meg_after_write, "read 1MB after physical write (correct CRC)", 16 }, + { write_one_sector_check_phys1_crc, "write one sector and check disk1 (do CRC check)", 16 }, + { write_one_page_check_phys1_crc, "write one page and check disk1 (do CRC check)", 16 }, + { write_two_pages_check_phys1_crc, "write two pages and check disk1 (do CRC check)", 16 }, + { write_one_meg_check_phys1_crc, "write 1MB and check disk1 (do CRC check)", 16 }, + { write_one_sector_check_phys_crc, "write one sector and check disks (do CRC check)", 16 }, + { write_one_page_check_phys_crc, "write one page and check disks (do CRC check)", 16 }, + { write_two_pages_check_phys_crc, "write two pages and check disks (do CRC check)", 16 }, + { write_one_meg_check_phys_crc, "write 1MB and check disks (do CRC check)", 16 }, + { corrupt_read_correct_one_sector_disk1, "read corrected one sector error from disk1", 18 }, + { corrupt_read_correct_one_sector_in_page_disk1, "read corrected one sector in page error from disk1", 18 }, + { corrupt_read_correct_one_page_disk1, "read corrected one page error from disk1", 18 }, + { corrupt_read_correct_ten_pages_in_one_meg_disk1, "read corrected ten pages error in one meg from disk1", 18 }, + { corrupt_read_correct_one_meg_disk1, "read corrected one meg error from disk1", 18 }, + { recover_one_sector_disk1, "recover one sector error from disk1", 18 }, + { recover_one_sector_in_page_disk1, "recover one sector error in one page from disk1", 18 }, + { recover_one_page_disk1, "recover one page filled with errors from disk1", 18 }, + { recover_ten_page_in_one_meg_disk1, "recover ten pages error in 1MB from disk1", 18 }, + { recover_one_meg_disk1, "recover 1MB filled with errors from disk1", 18 }, + { corrupt_read_correct_one_sector_disk2, "read corrected one sector error from disk2", 18 }, + { corrupt_read_correct_one_sector_in_page_disk2, "read corrected one sector in page error from disk2", 18 }, + { corrupt_read_correct_one_page_disk2, "read corrected one page error from disk2", 18 }, + { corrupt_read_correct_ten_pages_in_one_meg_disk2, "read corrected ten pages error in one meg from disk2", 18 }, + { corrupt_read_correct_one_meg_disk2, "read corrected one meg error from disk2", 18 }, + { recover_one_sector_disk2, "recover one sector error from disk2", 18 }, + { recover_one_sector_in_page_disk2, "recover one sector error in one page from disk2", 18 }, + { recover_one_page_disk2, "recover one page filled with errors from disk2", 18 }, + { recover_ten_page_in_one_meg_disk2, "recover ten pages error in 1MB from disk2", 18 }, + { recover_one_meg_disk2, "recover 1MB filled with errors from disk2", 18 }, + { dual_error, "signal error when both physical disks are corrupted", 12 }, +}; +size_t max_points = 900; + +/* Return number of tests in test_array. */ +size_t get_num_tests(void) +{ + return sizeof(test_array) / sizeof(test_array[0]); +} diff --git a/tools/labs/templates/assignments/3-raid/ssr.h b/tools/labs/templates/assignments/3-raid/ssr.h new file mode 100644 index 00000000000000..cc68b7d56d1720 --- /dev/null +++ b/tools/labs/templates/assignments/3-raid/ssr.h @@ -0,0 +1,26 @@ +/* + * Simple Software Raid - Linux header file + */ + +#ifndef SSR_H_ +#define SSR_H_ 1 + +#define SSR_MAJOR 240 +#define SSR_FIRST_MINOR 0 +#define SSR_NUM_MINORS 1 + +#define PHYSICAL_DISK1_NAME "/dev/vdb" +#define PHYSICAL_DISK2_NAME "/dev/vdc" + +/* sector size */ +#define KERNEL_SECTOR_SIZE 512 + +/* physical partition size - 95 MB (more than this results in error) */ +#define LOGICAL_DISK_NAME "/dev/ssr" +#define LOGICAL_DISK_SIZE (95 * 1024 * 1024) +#define LOGICAL_DISK_SECTORS ((LOGICAL_DISK_SIZE) / (KERNEL_SECTOR_SIZE)) + +/* sync data */ +#define SSR_IOCTL_SYNC 1 + +#endif diff --git a/tools/labs/templates/assignments/4-stp/Kbuild b/tools/labs/templates/assignments/4-stp/Kbuild new file mode 100644 index 00000000000000..feb82305e7f94a --- /dev/null +++ b/tools/labs/templates/assignments/4-stp/Kbuild @@ -0,0 +1,3 @@ +EXTRA_CFLAGS = -Wall -g + +obj-m = af_stp.o diff --git a/tools/labs/templates/assignments/4-stp/checker/.gitignore b/tools/labs/templates/assignments/4-stp/checker/.gitignore new file mode 100644 index 00000000000000..d4b87c0f1dff43 --- /dev/null +++ b/tools/labs/templates/assignments/4-stp/checker/.gitignore @@ -0,0 +1 @@ +/stp_test diff --git a/tools/labs/templates/assignments/4-stp/checker/Makefile b/tools/labs/templates/assignments/4-stp/checker/Makefile new file mode 100644 index 00000000000000..e0c2b42807f22f --- /dev/null +++ b/tools/labs/templates/assignments/4-stp/checker/Makefile @@ -0,0 +1,17 @@ +objects = _test/stp_test.o + +.PHONY: all clean _test_subdir_all _test_subdir_clean + +all: stp_test + +stp_test: _test_subdir_all $(objects) + $(CC) -Wall -g -m32 -static $(objects) -Wl,--whole-archive -lpthread -Wl,--no-whole-archive -o $@ + +_test_subdir_all: + make -C _test + +clean: _test_subdir_clean + -rm -f stp_test *~ + +_test_subdir_clean: + make -C _test clean diff --git a/tools/labs/templates/assignments/4-stp/checker/README b/tools/labs/templates/assignments/4-stp/checker/README new file mode 100644 index 00000000000000..a1d04d84f8653e --- /dev/null +++ b/tools/labs/templates/assignments/4-stp/checker/README @@ -0,0 +1,87 @@ += STP TEST SUITE == + +Test suite for SO2 Tranport Protocol + +== FILES == + +README + * this file + +Makefile + +_checker + * script to run all tests defined in _test/stp_test.c + +_test/Makefile + * test suite internal Makefile (creates necessary object files) + +_test/stp_test.c + * test suite for SO2 Transport Protocol + +_test/stp_test.h + * test suite header file + +_test/stp.h + * SO2 Transport Protocol header file (macros and structures) + +_test/test.h + * useful macros for testing + +_test/debug.h + * debugging macros + +_test/util.h + * useful macros for generic use (error processing) + +== BUILDING == + + +== RUNNING == + +Copy your af_stp.ko module and _checker and stp_test +to fsimg/root directory on your QEMU/KVM virtual machine. + +In order to run the test suite you can either use the _checker +script or run the stp_test executable. + +The _checker script runs all tests and computes assignment grade: + + ./_checker + +In order to run a specific test pass the test number (1 .. 32) to the +stp_test executable. + + ./stp_test 5 + +== TESTS == + +Tests are basically unit tests. A single function in the test_fun_array (see +stp_test.c) is called each time the stp_test executable is invoked, +testing a single functionality (and assuming previous tests have been run and +passed). + +The EXIT_IF_FAIL macro (see test.h) is unnecessary since after each test, the +program completes. + +Each test function follows the unit test pattern: initialization, action, +evaluation. The test macro (see test.h) is invoked at the end of each test +for evaluating and grading the test. + +== DEBUGGING == + +The debug.h header file consists of several macros useful for debugging +(dprintf, dlog). There are multiple uses of these macros throughout the above +files. + +In order to turn debug messages on, you must define the DEBUG macro, either in +a header file, or, I suggest, in the Makefile. The LOG_LEVEL macro limits the +log message types that are to be printed, by default LOG_WARNING (see enum in +debug.h). You may redefine it in a header file or in the Makefile. + +Rapid enabling of debug messages is achieved by commenting out the CPPFLAGS +line in the Makefile. It turns on debugging and enables all log messages +(LOG_DEBUG). + +== OTHER == + +srand48() and drand48() are used for generating random numbers. diff --git a/tools/labs/templates/assignments/4-stp/checker/_checker b/tools/labs/templates/assignments/4-stp/checker/_checker new file mode 100755 index 00000000000000..a2a588a66df0fa --- /dev/null +++ b/tools/labs/templates/assignments/4-stp/checker/_checker @@ -0,0 +1,24 @@ +#!/bin/sh + +first_test=1 +last_test=32 +executable=stp_test + +for i in $(seq $first_test $last_test); do + ./"$executable" $i +done | tee results.txt + +cat results.txt | grep '\[.*\]$' | awk -F '[] /[]+' ' +BEGIN { + sum=0 +} + +{ + sum += $2; +} + +END { + printf "\n%66s [%03d/100]\n", "Total:", sum; +}' + +rm -f results.txt diff --git a/tools/labs/templates/assignments/4-stp/checker/_test/Makefile b/tools/labs/templates/assignments/4-stp/checker/_test/Makefile new file mode 100644 index 00000000000000..d5074dd464a5f5 --- /dev/null +++ b/tools/labs/templates/assignments/4-stp/checker/_test/Makefile @@ -0,0 +1,11 @@ +#CPPFLAGS = -DDEBUG -DLOG_LEVEL=LOG_DEBUG +CFLAGS = -Wall -g -m32 + +.PHONY: all clean + +all: stp_test.o + +stp_test.o: stp_test.c stp_test.h stp.h test.h util.h debug.h + +clean: + -rm -f *~ *.o diff --git a/tools/labs/templates/assignments/4-stp/checker/_test/debug.h b/tools/labs/templates/assignments/4-stp/checker/_test/debug.h new file mode 100644 index 00000000000000..a54e9622647181 --- /dev/null +++ b/tools/labs/templates/assignments/4-stp/checker/_test/debug.h @@ -0,0 +1,77 @@ +/* + * debugging macros + * heavily inspired by previous work and Internet resources + * + * uses C99 variadic macros + * uses non-standard usage of the token-paste operator (##) for + * removing the comma symbol (,) when not followed by a token + * uses non-standard __FUNCTION__ macro (MSVC doesn't support __func__) + * tested on gcc 4.4.5 and Visual Studio 2008 (9.0), compiler version 15.00 + * + * Razvan Deaconescu, razvan.deaconescu@cs.pub.ro + */ + +#ifndef DEBUG_H_ +#define DEBUG_H_ 1 + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +/* log levels */ +enum { + LOG_EMERG = 1, + LOG_ALERT, + LOG_CRIT, + LOG_ERR, + LOG_WARNING, + LOG_NOTICE, + LOG_INFO, + LOG_DEBUG +}; + +/* + * initialize default loglevel (for dlog) + * may be redefined in the including code + */ + +#ifndef LOG_LEVEL +#define LOG_LEVEL LOG_WARNING +#endif + +/* + * define DEBUG macro as a compiler option: + * -DDEBUG for GCC + * /DDEBUG for MSVC + */ + +#if defined DEBUG +#define dprintf(format, ...) \ + fprintf(stderr, " [%s(), %s:%u] " format, \ + __FUNCTION__, __FILE__, __LINE__, \ + ##__VA_ARGS__) +#else +#define dprintf(format, ...) \ + do { \ + } while (0) +#endif + +#if defined DEBUG +#define dlog(level, format, ...) \ + do { \ + if (level <= LOG_LEVEL) \ + dprintf(format, ##__VA_ARGS__); \ + } while (0) +#else +#define dlog(level, format, ...) \ + do { \ + } while (0) +#endif + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/tools/labs/templates/assignments/4-stp/checker/_test/stp.h b/tools/labs/templates/assignments/4-stp/checker/_test/stp.h new file mode 100644 index 00000000000000..838f9936bf55c5 --- /dev/null +++ b/tools/labs/templates/assignments/4-stp/checker/_test/stp.h @@ -0,0 +1,51 @@ +/* + * SO2 Transport Protocol + */ + +#ifndef STP_H_ +#define STP_H_ 1 + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +/* STP reuses the defines of ancient protocols like Econet and Xerox PUP + * because adding a new protocol would involve patching the kernel, which we + * don't want to do and besides that, they are probably not used anymore. + */ +#define AF_STP 19 +#define PF_STP AF_STP +#define ETH_P_STP 0x0a00 + +struct stp_hdr { + __be16 dst; /* Destination port */ + __be16 src; /* Source port */ + __be16 len; /* Total length, including header */ + __u8 flags; /* */ + __u8 csum; /* xor of all bytes, including header */ +}; + +struct sockaddr_stp { + unsigned short sas_family; /* Always AF_STP */ + int sas_ifindex; /* Interface index */ + __be16 sas_port; /* Port */ + __u8 sas_addr[6]; /* MAC address */ +}; + +/* STP protocol name; used as identifier in /proc/net/protocols */ +#define STP_PROTO_NAME "STP" + +/* + * STP uses proc interface to communicate statistical information to + * user space (in /proc/net/). + */ +#define STP_PROC_NET_FILENAME "stp_stats" +#define STP_PROC_FULL_FILENAME "/proc/net/" STP_PROC_NET_FILENAME + +#ifdef __cplusplus +} +#endif + +#endif /* STP_H_ */ diff --git a/tools/labs/templates/assignments/4-stp/checker/_test/stp_test.c b/tools/labs/templates/assignments/4-stp/checker/_test/stp_test.c new file mode 100644 index 00000000000000..d6c729e344e634 --- /dev/null +++ b/tools/labs/templates/assignments/4-stp/checker/_test/stp_test.c @@ -0,0 +1,1331 @@ +/* + * SO2 Transport Protocol - test suite + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "test.h" +#include "debug.h" +#include "util.h" + +#include "stp.h" +#include "stp_test.h" + +#define SSA struct sockaddr +#define BUFLEN 32 + +/* declared in test.h; used for printing information in test macro */ +int max_points = 100; + +/* values read from STP_PROC_FULL_FILENAME */ +static int rx_pkts, hdr_err, csum_err, no_sock, no_buffs, tx_pkts; + +enum socket_action { + ACTION_SENDTO, + ACTION_SENDMSG, + ACTION_SEND, + ACTION_SENDTO_PING_PONG, + ACTION_SENDMSG_PING_PONG, + ACTION_SEND_PING_PONG, +}; + +/* + * Do initialization for STP test functions. + */ + +static void init_test(void) +{ + system("insmod " MODULE_FILENAME); +} + +/* + * Do cleanup for STP test functions. + */ + +static void cleanup_test(void) +{ + system("rmmod " MODULE_NAME); +} + +/* + * Check for successful module insertion and removal from the kernel. + */ + +static void test_insmod_rmmod(void) +{ + int rc; + + rc = system("insmod " MODULE_FILENAME); + test("test_insmod", rc == 0, 1); + + rc = system("rmmod " MODULE_NAME); + test("test_rmmod", rc == 0, 1); + + rc = system("insmod " MODULE_FILENAME); + test(__FUNCTION__, rc == 0, 1); + + system("rmmod " MODULE_NAME); +} + +/* + * Check /proc/net/protocols for STP protocol. Grep for line starting with + * the string identified by STP_PROTO_NAME. + */ + +static void test_proto_name_exists_after_insmod(void) +{ + int rc; + + init_test(); + + rc = system("grep '^" STP_PROTO_NAME "' /proc/net/protocols > /dev/null 2>&1"); + test(__FUNCTION__, rc == 0, 2); + + cleanup_test(); +} + +/* + * STP entry in /proc/net/protocols is deleted when module is removed. + */ + +static void test_proto_name_inexistent_after_rmmod(void) +{ + int rc; + + init_test(); + cleanup_test(); + + rc = system("grep '^" STP_PROTO_NAME "' /proc/net/protocols > /dev/null 2>&1"); + test(__FUNCTION__, rc != 0, 2); +} + +/* + * Check for proc entry for STP statistics. + */ + +static void test_proc_entry_exists_after_insmod(void) +{ + int rc; + + init_test(); + + rc = access(STP_PROC_FULL_FILENAME, F_OK); + test(__FUNCTION__, rc == 0, 2); + + cleanup_test(); +} + +/* + * STP statistics file in /proc/net/ is deleted when module is removed. + */ + +static void test_proc_entry_inexistent_after_rmmod(void) +{ + int rc; + + init_test(); + cleanup_test(); + + rc = system("file " STP_PROC_FULL_FILENAME " > /dev/null 2>&1"); + test(__FUNCTION__, rc != 0, 2); +} + +/* + * Call socket(2) with proper arguments for creating an AF_STP socket. + */ + +static void test_socket(void) +{ + int s; + + init_test(); + + s = socket(AF_STP, SOCK_DGRAM, 0); + test(__FUNCTION__, s > 0, 5); + + close(s); + cleanup_test(); +} + +/* + * Create two AF_STP sockets using socket(2). + */ + +static void test_two_sockets(void) +{ + int s1, s2; + + init_test(); + + s1 = socket(AF_STP, SOCK_DGRAM, 0); + s2 = socket(AF_STP, SOCK_DGRAM, 0); + test(__FUNCTION__, s1 > 0 && s2 > 0 && s1 != s2, 2); + + close(s1); + close(s2); + cleanup_test(); +} + +/* + * Pass bad socket type argument to socket(2) (second argument). + * Call should fail. + */ + +static void test_socket_bad_socket_type(void) +{ + int s; + + init_test(); + + s = socket(AF_STP, SOCK_STREAM, 0); + test(__FUNCTION__, s < 0, 1); + + close(s); + cleanup_test(); +} + +/* + * Pass bad protocol argument to socket(2) (third argument). + * Call should fail. + */ + +static void test_socket_bad_protocol(void) +{ + int s; + + init_test(); + + s = socket(AF_STP, SOCK_DGRAM, IPPROTO_TCP); + test(__FUNCTION__, s < 0, 1); + + close(s); + cleanup_test(); +} + +/* + * Close open socket using close(2). + */ + +static void test_close(void) +{ + int s; + int rc; + + init_test(); + + s = socket(AF_STP, SOCK_DGRAM, 0); + + rc = close(s); + test(__FUNCTION__, rc == 0, 2); + + cleanup_test(); +} + +/* + * Pass closed socket descriptor to close(2). Call should fail. + */ + +static void test_close_closed_socket(void) +{ + int s; + int rc; + + init_test(); + + s = socket(AF_STP, SOCK_DGRAM, 0); + + close(s); + rc = close(s); + + test(__FUNCTION__, rc < 0, 2); + + cleanup_test(); +} + +/* + * Bind socket to proper address. Use "all" interface. + */ + +static void test_bind(void) +{ + int s; + int rc; + struct sockaddr_stp sas; + const unsigned short port = 12345; + + init_test(); + + s = socket(AF_STP, SOCK_DGRAM, 0); + + sas.sas_family = AF_STP; + sas.sas_port = htons(port); + sas.sas_ifindex = 0; + rc = bind(s, (struct sockaddr *) &sas, sizeof(struct sockaddr_stp)); + + test(__FUNCTION__, rc == 0, 5); + + close(s); + cleanup_test(); +} + +/* + * Bind socket to proper address. Use "eth0" interface. + */ + +static void test_bind_eth0(void) +{ + int s; + int rc; + struct sockaddr_stp sas; + const unsigned short port = 12345; + + init_test(); + + s = socket(AF_STP, SOCK_DGRAM, 0); + + sas.sas_family = AF_STP; + sas.sas_port = htons(port); + sas.sas_ifindex = if_nametoindex("eth0"); + rc = bind(s, (struct sockaddr *) &sas, sizeof(struct sockaddr_stp)); + + test(__FUNCTION__, rc == 0, 2); + + close(s); + cleanup_test(); +} + +/* + * Use bind(2) on two AF_STP sockets. + */ + +static void test_two_binds(void) +{ + int s1, s2; + int rc1, rc2; + struct sockaddr_stp sas1, sas2; + const unsigned short port1 = 12345, port2 = 54321; + + init_test(); + + s1 = socket(AF_STP, SOCK_DGRAM, 0); + + sas1.sas_family = AF_STP; + sas1.sas_port = htons(port1); + sas1.sas_ifindex = 0; + rc1 = bind(s1, (struct sockaddr *) &sas1, sizeof(struct sockaddr_stp)); + + s2 = socket(AF_STP, SOCK_DGRAM, 0); + + sas2.sas_family = AF_STP; + sas2.sas_port = htons(port2); + sas2.sas_ifindex = 0; + rc2 = bind(s2, (struct sockaddr *) &sas2, sizeof(struct sockaddr_stp)); + + test(__FUNCTION__, rc1 == 0 && rc2 == 0, 2); + + close(s1); close(s2); + cleanup_test(); +} + +/* + * Pass bad address to bind(2) (second argument). + * Call should fail. + */ + +static void test_bind_bad_address(void) +{ + int s; + int rc; + struct sockaddr_stp sas; + const unsigned short port = 12345; + + init_test(); + + s = socket(AF_STP, SOCK_DGRAM, 0); + + sas.sas_family = AF_INET; /* invalid */ + sas.sas_port = htons(port); + sas.sas_ifindex = 0; + rc = bind(s, (struct sockaddr *) &sas, sizeof(struct sockaddr_stp)); + + test(__FUNCTION__, rc != 0, 1); + + close(s); + cleanup_test(); +} + +/* + * Use bind(2) on two AF_STP sockets using same port and "all" interface. + * Call should fail. + */ + +static void test_two_binds_same_if(void) +{ + int s1, s2; + int rc1, rc2; + struct sockaddr_stp sas1, sas2; + const unsigned short port = 12345; + + init_test(); + + s1 = socket(AF_STP, SOCK_DGRAM, 0); + + sas1.sas_family = AF_STP; + sas1.sas_port = htons(port); + sas1.sas_ifindex = 0; + rc1 = bind(s1, (struct sockaddr *) &sas1, sizeof(struct sockaddr_stp)); + + s2 = socket(AF_STP, SOCK_DGRAM, 0); + + sas2.sas_family = AF_STP; + sas2.sas_port = htons(port); + sas2.sas_ifindex = 0; + rc2 = bind(s2, (struct sockaddr *) &sas2, sizeof(struct sockaddr_stp)); + + test(__FUNCTION__, rc1 == 0 && rc2 < 0, 2); + + close(s1); close(s2); + cleanup_test(); +} + +/* + * Use bind(2) on two AF_STP sockets using same port and same interface. + * Call should fail. + */ + +static void test_two_binds_same_if_eth0(void) +{ + int s1, s2; + int rc1, rc2; + struct sockaddr_stp sas1, sas2; + const unsigned short port = 12345; + + init_test(); + + s1 = socket(AF_STP, SOCK_DGRAM, 0); + + sas1.sas_family = AF_STP; + sas1.sas_port = htons(port); + sas1.sas_ifindex = if_nametoindex("eth0"); + rc1 = bind(s1, (struct sockaddr *) &sas1, sizeof(struct sockaddr_stp)); + + s2 = socket(AF_STP, SOCK_DGRAM, 0); + + sas2.sas_family = AF_STP; + sas2.sas_port = htons(port); + sas2.sas_ifindex = if_nametoindex("eth0"); + rc2 = bind(s2, (struct sockaddr *) &sas2, sizeof(struct sockaddr_stp)); + + test(__FUNCTION__, rc1 == 0 && rc2 < 0, 2); + + close(s1); close(s2); + cleanup_test(); +} + +/* + * Use bind(2) on two AF_STP sockets using same port and "all" interface and + * "eth0". + * Call should fail. + */ + +static void test_two_binds_same_if_all_eth0(void) +{ + int s1, s2; + int rc1, rc2; + struct sockaddr_stp sas1, sas2; + const unsigned short port = 12345; + + init_test(); + + s1 = socket(AF_STP, SOCK_DGRAM, 0); + + sas1.sas_family = AF_STP; + sas1.sas_port = htons(port); + sas1.sas_ifindex = 0; + rc1 = bind(s1, (struct sockaddr *) &sas1, sizeof(struct sockaddr_stp)); + + s2 = socket(AF_STP, SOCK_DGRAM, 0); + + sas2.sas_family = AF_STP; + sas2.sas_port = htons(port); + sas2.sas_ifindex = if_nametoindex("eth0"); + rc2 = bind(s2, (struct sockaddr *) &sas2, sizeof(struct sockaddr_stp)); + + test(__FUNCTION__, rc1 == 0 && rc2 < 0, 2); + + close(s1); close(s2); + cleanup_test(); +} + +/* + * Use bind(2) on two AF_STP sockets using same port and "eth0" interface and + * "all". + * Call should fail. + */ + +static void test_two_binds_same_if_eth0_all(void) +{ + int s1, s2; + int rc1, rc2; + struct sockaddr_stp sas1, sas2; + const unsigned short port = 12345; + + init_test(); + + s1 = socket(AF_STP, SOCK_DGRAM, 0); + + sas1.sas_family = AF_STP; + sas1.sas_port = htons(port); + sas1.sas_ifindex = if_nametoindex("eth0"); + rc1 = bind(s1, (struct sockaddr *) &sas1, sizeof(struct sockaddr_stp)); + + s2 = socket(AF_STP, SOCK_DGRAM, 0); + + sas2.sas_family = AF_STP; + sas2.sas_port = htons(port); + sas2.sas_ifindex = 0; + rc2 = bind(s2, (struct sockaddr *) &sas2, sizeof(struct sockaddr_stp)); + + test(__FUNCTION__, rc1 == 0 && rc2 < 0, 2); + + close(s1); close(s2); + cleanup_test(); +} + +static ssize_t sendto_message(int sockfd, struct sockaddr_stp *sas, + char *buf, size_t len) +{ + return sendto(sockfd, buf, len, 0, (SSA *) sas, sizeof(*sas)); +} + +static ssize_t sendmsg_message(int sockfd, struct sockaddr_stp *sas, + char *buf, size_t len) +{ + struct iovec iov; + struct msghdr msg; + + iov.iov_base = buf; + iov.iov_len = len; + msg.msg_name = sas; + msg.msg_namelen = sizeof(*sas); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = NULL; + msg.msg_controllen = 0; + msg.msg_flags = 0; + + return sendmsg(sockfd, &msg, 0); +} + +static ssize_t send_message(int sockfd, char *buf, size_t len) +{ + return send(sockfd, buf, len, 0); +} + +/* + * Use recvfrom(2) to receive message. We don't care what is the source + * address of the message. + */ + +static ssize_t recvfrom_message(int sockfd, char *buf, size_t len) +{ + dprintf("ready to receive using recvfrom\n"); + return recvfrom(sockfd, buf, len, 0, NULL, NULL); +} + +/* + * Use recvmsg(2) to receive message. We don't care what is the source + * address of the message. + */ + +static ssize_t recvmsg_message(int sockfd, char *buf, size_t len) +{ + struct iovec iov; + struct msghdr msg; + + iov.iov_base = buf; + iov.iov_len = len; + msg.msg_name = NULL; + msg.msg_namelen = 0; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = NULL; + msg.msg_controllen = 0; + msg.msg_flags = 0; + + return recvmsg(sockfd, &msg, 0); +} + +/* + * Can not use recv(2) on datagram sockets. call recvfrom_message(). + */ + +static ssize_t recv_message(int sockfd, char *buf, size_t len) +{ + dprintf("ready to receive using recv\n"); + return recv(sockfd, buf, len, 0); +} + +/* + * Use sendto(2) on a socket. + */ + +static void test_sendto(void) +{ + int s; + int rc; + struct sockaddr_stp sas, remote_sas; + const unsigned short port = 12345, remote_port = 54321; + char bufout[BUFLEN] = DEFAULT_SENDER_MESSAGE; + + init_test(); + + s = socket(AF_STP, SOCK_DGRAM, 0); + + sas.sas_family = AF_STP; + sas.sas_port = htons(port); + sas.sas_ifindex = if_nametoindex("lo"); + bind(s, (struct sockaddr *) &sas, sizeof(struct sockaddr_stp)); + + remote_sas.sas_family = AF_STP; + remote_sas.sas_port = htons(remote_port); + remote_sas.sas_ifindex = 0; + memcpy(remote_sas.sas_addr, ether_aton("00:00:00:00:00:00"), + sizeof(remote_sas.sas_addr)); + + rc = sendto_message(s, &remote_sas, bufout, BUFLEN); + + test(__FUNCTION__, rc >= 0, 5); + + close(s); + cleanup_test(); +} + +/* + * Use sendmsg(2) on a socket. + */ + +static void test_sendmsg(void) +{ + int s; + int rc; + struct sockaddr_stp sas, remote_sas; + const unsigned short port = 12345, remote_port = 54321; + char bufout[BUFLEN] = DEFAULT_SENDER_MESSAGE; + + init_test(); + + s = socket(AF_STP, SOCK_DGRAM, 0); + + sas.sas_family = AF_STP; + sas.sas_port = htons(port); + sas.sas_ifindex = if_nametoindex("lo"); + bind(s, (struct sockaddr *) &sas, sizeof(struct sockaddr_stp)); + + remote_sas.sas_family = AF_STP; + remote_sas.sas_port = htons(remote_port); + remote_sas.sas_ifindex = 0; + memcpy(remote_sas.sas_addr, ether_aton("00:00:00:00:00:00"), + sizeof(remote_sas.sas_addr)); + + rc = sendmsg_message(s, &remote_sas, bufout, BUFLEN); + + test(__FUNCTION__, rc >= 0, 3); + + close(s); + cleanup_test(); +} + +/* + * Connect local socket to remote AF_STP socket. + */ + +static void test_connect(void) +{ + int s; + int rc; + struct sockaddr_stp sas, remote_sas; + const unsigned short port = 12345, remote_port = 54321; + + init_test(); + + s = socket(AF_STP, SOCK_DGRAM, 0); + + sas.sas_family = AF_STP; + sas.sas_port = htons(port); + sas.sas_ifindex = if_nametoindex("lo"); + bind(s, (struct sockaddr *) &sas, sizeof(struct sockaddr_stp)); + + remote_sas.sas_family = AF_STP; + remote_sas.sas_port = htons(remote_port); + remote_sas.sas_ifindex = 0; + memcpy(remote_sas.sas_addr, ether_aton("00:00:00:00:00:00"), + sizeof(remote_sas.sas_addr)); + + rc = connect(s, (struct sockaddr *) &remote_sas, sizeof(remote_sas)); + + test(__FUNCTION__, rc >= 0, 5); + + close(s); + cleanup_test(); +} + +/* + * Use send(2) on a connected socket. + */ + +static void test_send(void) +{ + int s; + int rc; + struct sockaddr_stp sas, remote_sas; + const unsigned short port = 12345, remote_port = 54321; + char bufout[BUFLEN] = DEFAULT_SENDER_MESSAGE; + + init_test(); + + s = socket(AF_STP, SOCK_DGRAM, 0); + + sas.sas_family = AF_STP; + sas.sas_port = htons(port); + sas.sas_ifindex = if_nametoindex("lo"); + bind(s, (struct sockaddr *) &sas, sizeof(struct sockaddr_stp)); + + remote_sas.sas_family = AF_STP; + remote_sas.sas_port = htons(remote_port); + remote_sas.sas_ifindex = 0; + memcpy(remote_sas.sas_addr, ether_aton("00:00:00:00:00:00"), + sizeof(remote_sas.sas_addr)); + + rc = connect(s, (SSA *) &remote_sas, sizeof(remote_sas)); + assert(rc == 0); + + rc = send_message(s, bufout, BUFLEN); + + test(__FUNCTION__, rc >= 0, 5); + + close(s); + cleanup_test(); +} + +/* + * Read values from STP_PROC_FULL_FILENAME. + */ + +static int stp_proc_read_values(void) +{ + char buffer[256]; + FILE *f; + + f = fopen(STP_PROC_FULL_FILENAME, "rt"); + if (f == NULL) + return -1; + + /* read column line */ + fgets(buffer, 256, f); + + /* read values line */ + fscanf(f, "%d %d %d %d %d %d", + &rx_pkts, &hdr_err, &csum_err, &no_sock, &no_buffs, &tx_pkts); + dprintf("read: %d %d %d %d %d %d\n", + rx_pkts, hdr_err, csum_err, no_sock, no_buffs, tx_pkts); + + fclose(f); + + return 0; +} + +/* + * Send packet updates RxPkts column in STP_PROC_FULL_FILENAME. + * Expected values are 1, 1. + */ + +static void test_stat_tx(void) +{ + int s; + int rc; + struct sockaddr_stp sas, remote_sas; + const unsigned short port = 12345, remote_port = 54321; + char bufout[BUFLEN] = DEFAULT_SENDER_MESSAGE; + + init_test(); + + s = socket(AF_STP, SOCK_DGRAM, 0); + + sas.sas_family = AF_STP; + sas.sas_port = htons(port); + sas.sas_ifindex = if_nametoindex("lo"); + bind(s, (struct sockaddr *) &sas, sizeof(struct sockaddr_stp)); + + remote_sas.sas_family = AF_STP; + remote_sas.sas_port = htons(remote_port); + remote_sas.sas_ifindex = 0; + memcpy(remote_sas.sas_addr, ether_aton("00:00:00:00:00:00"), + sizeof(remote_sas.sas_addr)); + + rc = connect(s, (SSA *) &remote_sas, sizeof(remote_sas)); + assert(rc == 0); + + send_message(s, bufout, BUFLEN); + + close(s); + + stp_proc_read_values(); + + test(__FUNCTION__, tx_pkts == 1, 3); + + cleanup_test(); +} + +/* + * Start sender process. + * + * action switches between sendto(2), sendmsg(2), send(2) and whether + * to do ping_pong or not. + */ + +static pid_t start_sender(enum socket_action action) +{ + pid_t pid; + int s; + struct sockaddr_stp sas, remote_sas; + const unsigned short port = 12345, remote_port = 54321; + char bufin[BUFLEN]; + char bufout[BUFLEN] = DEFAULT_SENDER_MESSAGE; + ssize_t bytes_recv = 0, bytes_sent = 0; + sem_t *sem; + + /* set bufin to 0 for testing purposes (it should be overwritten) */ + memset(bufin, 0, BUFLEN); + + pid = fork(); + DIE(pid < 0, "fork"); + + switch (pid) { + case 0: /* child process */ + break; + + default: /* parent process */ + return pid; + } + + /* only child process (sender) is running */ + + sem = sem_open(SEM_NAME_SENDER, 0); + if (sem == SEM_FAILED) + exit(EXIT_FAILURE); + + s = socket(AF_STP, SOCK_DGRAM, 0); + + sas.sas_family = AF_STP; + sas.sas_port = htons(port); + sas.sas_ifindex = if_nametoindex("lo"); + bind(s, (struct sockaddr *) &sas, sizeof(struct sockaddr_stp)); + + remote_sas.sas_family = AF_STP; + remote_sas.sas_port = htons(remote_port); + remote_sas.sas_ifindex = 0; + memcpy(remote_sas.sas_addr, ether_aton("00:00:00:00:00:00"), + sizeof(remote_sas.sas_addr)); + if (action == ACTION_SEND || action == ACTION_SEND_PING_PONG) { + int rc; + + rc = connect(s, (SSA *) &remote_sas, sizeof(remote_sas)); + assert(rc == 0); + } + + switch (action) { + case ACTION_SENDTO: + case ACTION_SENDTO_PING_PONG: + bytes_sent = sendto_message(s, &remote_sas, bufout, BUFLEN); + dprintf("sent %s\n", bufout); + break; + + case ACTION_SENDMSG: + case ACTION_SENDMSG_PING_PONG: + bytes_sent = sendmsg_message(s, &remote_sas, bufout, BUFLEN); + dprintf("sent %s\n", bufout); + break; + + case ACTION_SEND: + case ACTION_SEND_PING_PONG: + bytes_sent = send_message(s, bufout, BUFLEN); + dprintf("sent %s\n", bufout); + break; + + default: + break; + } + + switch (action) { + case ACTION_SENDTO_PING_PONG: + bytes_recv = recvfrom_message(s, bufin, BUFLEN); + dprintf("received %s\n", bufin); + break; + case ACTION_SENDMSG_PING_PONG: + bytes_recv = recvmsg_message(s, bufin, BUFLEN); + dprintf("received %s\n", bufin); + break; + case ACTION_SEND_PING_PONG: + bytes_recv = recv_message(s, bufin, BUFLEN); + dprintf("received %s\n", bufin); + break; + default: + break; + } + + /* Let the parent know we're done. */ + sem_post(sem); + + /* exit with EXIT_SUCCESS in case of successful communication */ + switch (action) { + case ACTION_SENDTO: + case ACTION_SEND: + case ACTION_SENDMSG: + if (bytes_sent > 0) + exit(EXIT_SUCCESS); + break; + + case ACTION_SENDMSG_PING_PONG: + case ACTION_SENDTO_PING_PONG: + case ACTION_SEND_PING_PONG: + dprintf("(ping_pong) bytes_sent: %d, bytes_recv: %d, strcmp: %d\n", + bytes_sent, bytes_recv, strcmp(bufin, bufout)); + dprintf("bufin: #%s#, bufout: #%s#\n", bufin, bufout); + if (bytes_sent > 0 && bytes_recv > 0 && + strcmp(bufin, DEFAULT_RECEIVER_MESSAGE) == 0) + exit(EXIT_SUCCESS); + break; + } + + exit(EXIT_FAILURE); + + /* is not reached */ + return 0; +} + +/* + * Start receiver process. + * + * action switches between sendto(2), sendmsg(2), send(2) and whether + * to do ping_pong or not. + */ + +static pid_t start_receiver(enum socket_action action) +{ + pid_t pid; + int s; + struct sockaddr_stp sas, remote_sas; + const unsigned short port = 54321, remote_port = 12345; + char bufin[BUFLEN]; + char bufout[BUFLEN] = DEFAULT_RECEIVER_MESSAGE; + ssize_t bytes_recv = 0, bytes_sent = 0; + sem_t *sem; + + /* set bufin to 0 for testing purposes (it should be overwritten) */ + memset(bufin, 0, BUFLEN); + + pid = fork(); + DIE(pid < 0, "fork"); + + switch (pid) { + case 0: /* child process */ + break; + + default: /* parent process */ + return pid; + } + + /* only child process (receiver) is running */ + + sem = sem_open(SEM_NAME_RECEIVER, 0); + if (sem == SEM_FAILED) + exit(EXIT_FAILURE); + + s = socket(AF_STP, SOCK_DGRAM, 0); + + sas.sas_family = AF_STP; + sas.sas_port = htons(port); + sas.sas_ifindex = if_nametoindex("lo"); + bind(s, (struct sockaddr *) &sas, sizeof(struct sockaddr_stp)); + + remote_sas.sas_family = AF_STP; + remote_sas.sas_port = htons(remote_port); + remote_sas.sas_ifindex = 0; + memcpy(remote_sas.sas_addr, ether_aton("00:00:00:00:00:00"), + sizeof(remote_sas.sas_addr)); + + if (action == ACTION_SEND || action == ACTION_SEND_PING_PONG) { + int rc; + + rc = connect(s, (SSA *) &remote_sas, sizeof(remote_sas)); + assert(rc == 0); + dprintf("connected\n"); + } + + /* We're set up, let the parent know. */ + sem_post(sem); + + switch (action) { + case ACTION_SENDTO: + case ACTION_SENDTO_PING_PONG: + bytes_recv = recvfrom_message(s, bufin, BUFLEN); + dprintf("received %s\n", bufin); + break; + + case ACTION_SENDMSG: + case ACTION_SENDMSG_PING_PONG: + bytes_recv = recvmsg_message(s, bufin, BUFLEN); + dprintf("received %s\n", bufin); + break; + + case ACTION_SEND: + case ACTION_SEND_PING_PONG: + bytes_recv = recv_message(s, bufin, BUFLEN); + dprintf("received %s\n", bufin); + break; + + default: + break; + } + + switch (action) { + case ACTION_SENDTO_PING_PONG: + bytes_sent = sendto_message(s, &remote_sas, bufout, BUFLEN); + dprintf("sent %s\n", bufout); + break; + case ACTION_SENDMSG_PING_PONG: + bytes_sent = sendmsg_message(s, &remote_sas, bufout, BUFLEN); + dprintf("sent %s\n", bufout); + break; + case ACTION_SEND_PING_PONG: + bytes_sent = send_message(s, bufout, BUFLEN); + dprintf("sent %s\n", bufout); + break; + default: + break; + } + + /* Let the parent know we're done. */ + sem_post(sem); + + /* exit with EXIT_SUCCESS in case of successful communication */ + switch (action) { + case ACTION_SENDTO: + case ACTION_SEND: + case ACTION_SENDMSG: + if (bytes_recv > 0) + exit(EXIT_SUCCESS); + break; + + case ACTION_SENDMSG_PING_PONG: + case ACTION_SENDTO_PING_PONG: + case ACTION_SEND_PING_PONG: + dprintf("(ping_pong) bytes_sent: %d, bytes_recv: %d\n", + bytes_sent, bytes_recv); + dprintf("bufin: #%s#, bufout: #%s#\n", bufin, bufout); + if (bytes_recv > 0 && bytes_sent > 0 && + strcmp(bufin, DEFAULT_SENDER_MESSAGE) == 0) + exit(EXIT_SUCCESS); + break; + } + + exit(EXIT_FAILURE); + + /* is not reached */ + return 0; +} + +int wait_for_semaphore(sem_t *sem, unsigned int secs) +{ + struct timespec ts; + int ret; + + ret = clock_gettime(CLOCK_REALTIME, &ts); + assert(ret == 0); + + ts.tv_sec += secs; + + ret = sem_timedwait(sem, &ts); + return ret; +} + +/* + * Wrapper call for running a sender and a receiver process. + * + * action switches between sendto(2), sendmsg(2), send(2) and whether + * to do ping_pong or not. + * + * Returns boolean value: 1 in case of successful run, 0 otherwise. + */ + +static int run_sender_receiver(enum socket_action action) +{ + pid_t pid_r = 0, pid_s = 0; + int rc1, rc2, ret; + int status1, status2; + sem_t *sem_r, *sem_s; + + /* Create two named semaphores used to communicate + * with the child processes + */ + sem_r = sem_open(SEM_NAME_RECEIVER, O_CREAT, (mode_t)0644, 0); + assert(sem_r != SEM_FAILED); + sem_s = sem_open(SEM_NAME_SENDER, O_CREAT, (mode_t)0644, 0); + assert(sem_s != SEM_FAILED); + + /* start the receiver */ + pid_r = start_receiver(action); + assert(pid_r > 0); + /* wait for it to bind */ + wait_for_semaphore(sem_r, RECV_TIMEOUT); + + /* Receiver is set up, start the sender now. */ + pid_s = start_sender(action); + assert(pid_s > 0); + + /* Wait for both to finish. */ + rc1 = wait_for_semaphore(sem_r, SENDRECV_TIMEOUT); + ret = waitpid(pid_r, &status1, rc1 ? WNOHANG : 0); + assert(ret >= 0); + kill(pid_r, SIGTERM); kill(pid_r, SIGKILL); + + rc2 = wait_for_semaphore(sem_s, SENDRECV_TIMEOUT); + ret = waitpid(pid_s, &status2, rc2 ? WNOHANG : 0); + assert(ret >= 0); + kill(pid_s, SIGTERM); kill(pid_s, SIGKILL); + + sem_close(sem_r); sem_unlink(SEM_NAME_RECEIVER); + sem_close(sem_s); sem_unlink(SEM_NAME_SENDER); + + return !rc1 && !rc2 && + WIFEXITED(status1) && WEXITSTATUS(status1) == EXIT_SUCCESS && + WIFEXITED(status2) && WEXITSTATUS(status2) == EXIT_SUCCESS; +} + +/* + * Send a datagram on one end and receive it on the other end. + * Use sendto(2) and recvfrom(2). + */ + +static void test_sendto_recvfrom(void) +{ + int rc; + + init_test(); + + rc = run_sender_receiver(ACTION_SENDTO); + + test(__FUNCTION__, rc != 0, 10); + + cleanup_test(); +} + +/* + * Send and receive packet updates RxPkts and TxPkts columns in + * STP_PROC_FULL_FILENAME. Expected values are 1, 1. + */ + +static void test_stat_tx_rx(void) +{ + init_test(); + + run_sender_receiver(ACTION_SENDTO); + + stp_proc_read_values(); + + test(__FUNCTION__, tx_pkts == 1 && rx_pkts == 1, 3); + + cleanup_test(); +} + +/* + * Send a packet and then wait for a reply. + */ + +static void test_sendto_recvfrom_ping_pong(void) +{ + int rc; + + init_test(); + + rc = run_sender_receiver(ACTION_SENDTO_PING_PONG); + + test(__FUNCTION__, rc != 0, 5); + + cleanup_test(); +} + +/* + * Send and receive ping pong updates RxPkts and TxPkts column in + * STP_PROC_FULL_FILENAME. Expected values are 2, 2. + */ + +static void test_stat_tx_rx_ping_pong(void) +{ + init_test(); + + run_sender_receiver(ACTION_SENDTO_PING_PONG); + + stp_proc_read_values(); + stp_proc_read_values(); + + test(__FUNCTION__, tx_pkts == 2 && rx_pkts == 2, 3); + + cleanup_test(); +} + +/* + * Send a datagram on one end and receive it on the other end. + * Use sendmsg(2) and recvmsg(2). + */ + +static void test_sendmsg_recvmsg(void) +{ + int rc; + + init_test(); + + rc = run_sender_receiver(ACTION_SENDMSG); + + test(__FUNCTION__, rc != 0, 5); + + cleanup_test(); +} + +/* + * Send a packet and then wait for a reply. + */ + +static void test_sendmsg_recvmsg_ping_pong(void) +{ + int rc; + + init_test(); + + rc = run_sender_receiver(ACTION_SENDMSG_PING_PONG); + + test(__FUNCTION__, rc != 0, 3); + + cleanup_test(); +} + +/* + * Send a packet on one end and receive it on the other end. + * Use send(2) and recv(2). + */ + +static void test_send_receive(void) +{ + int rc; + + init_test(); + + rc = run_sender_receiver(ACTION_SEND); + + test(__FUNCTION__, rc != 0, 5); + + cleanup_test(); +} + +/* + * Send a packet and then wait for a reply. + */ + +static void test_send_receive_ping_pong(void) +{ + int rc; + + init_test(); + + rc = run_sender_receiver(ACTION_SEND_PING_PONG); + + test(__FUNCTION__, rc != 0, 3); + + cleanup_test(); +} + +static void (*test_fun_array[])(void) = { + NULL, + test_insmod_rmmod, + test_proto_name_exists_after_insmod, + test_proto_name_inexistent_after_rmmod, + test_proc_entry_exists_after_insmod, + test_proc_entry_inexistent_after_rmmod, + test_socket, + test_two_sockets, + test_socket_bad_socket_type, + test_socket_bad_protocol, + test_close, + test_close_closed_socket, + test_bind, + test_bind_eth0, + test_two_binds, + test_bind_bad_address, + test_two_binds_same_if, + test_two_binds_same_if_eth0, + test_two_binds_same_if_all_eth0, + test_two_binds_same_if_eth0_all, + test_sendto, + test_sendmsg, + test_connect, + test_send, + test_stat_tx, + test_sendto_recvfrom, + test_stat_tx_rx, + test_sendto_recvfrom_ping_pong, + test_stat_tx_rx_ping_pong, + test_sendmsg_recvmsg, + test_sendmsg_recvmsg_ping_pong, + test_send_receive, + test_send_receive_ping_pong, +}; + +/* + * Usage message for invalid executable call. + */ + +static void usage(const char *argv0) +{ + fprintf(stderr, "Usage: %s test_no\n\n", argv0); + exit(EXIT_FAILURE); +} + +int main(int argc, char **argv) +{ + int test_idx; + + if (argc != 2) + usage(argv[0]); + + test_idx = atoi(argv[1]); + + if (test_idx < 1 || + test_idx >= sizeof(test_fun_array)/sizeof(test_fun_array[0])) { + fprintf(stderr, "Error: test index %d is out of bounds\n", + test_idx); + exit(EXIT_FAILURE); + } + + srand(time(NULL)); + srand48(time(NULL)); + test_fun_array[test_idx](); + + return 0; +} diff --git a/tools/labs/templates/assignments/4-stp/checker/_test/stp_test.h b/tools/labs/templates/assignments/4-stp/checker/_test/stp_test.h new file mode 100644 index 00000000000000..fb708433c0269f --- /dev/null +++ b/tools/labs/templates/assignments/4-stp/checker/_test/stp_test.h @@ -0,0 +1,31 @@ +/* + * SO2 Transport Protocol - test suite specific header + */ + +#ifndef STP_TEST_H_ +#define STP_TEST_H_ 1 + +#ifdef __cplusplus +extern "C" { +#endif + +/* STP test suite macros and structures */ +#define MODULE_NAME "af_stp" +#define MODULE_FILENAME MODULE_NAME ".ko" + +#define SEM_NAME_RECEIVER "/receiver_sem" +#define SEM_NAME_SENDER "/sender_sem" + +/* timeouts waiting for receiver/sender */ +#define RECV_TIMEOUT 1 +#define SENDRECV_TIMEOUT 3 + +/* messages used for "ping-pong" between sender and receiver */ +#define DEFAULT_SENDER_MESSAGE "You called down the thunder" +#define DEFAULT_RECEIVER_MESSAGE "now reap the whirlwind" + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/tools/labs/templates/assignments/4-stp/checker/_test/test.h b/tools/labs/templates/assignments/4-stp/checker/_test/test.h new file mode 100644 index 00000000000000..4bcafad9c7d0f5 --- /dev/null +++ b/tools/labs/templates/assignments/4-stp/checker/_test/test.h @@ -0,0 +1,63 @@ +/* + * generic test suite + * + * test macros and headers + */ + +#ifndef TEST_H_ +#define TEST_H_ 1 + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +/* to be defined by calling program */ +extern int max_points; + +/* + * uncommend EXIT_IF_FAIL macro in order to stop test execution + * at first failed test + */ + +/*#define EXIT_IF_FAIL 1*/ + +#if defined(EXIT_IF_FAIL) +#define test_do_fail(points) \ + do { \ + printf("failed\n"); \ + exit(EXIT_FAILURE); \ + } while (0) +#else +#define test_do_fail(points) \ + printf("failed [ 0/%3d]\n", max_points) +#endif + +#define test_do_pass(points) \ + printf("passed [%3d/%3d]\n", points, max_points) + +#define test(message, test, points) \ + do { \ + size_t i; \ + int t = (test); \ + \ + printf("%s", message); \ + fflush(stdout); \ + \ + for (i = 0; i < 60 - strlen(message); i++) \ + putchar('.'); \ + \ + if (!t) \ + test_do_fail(points); \ + else \ + test_do_pass(points); \ + \ + fflush(stdout); \ + } while (0) + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/tools/labs/templates/assignments/4-stp/checker/_test/util.h b/tools/labs/templates/assignments/4-stp/checker/_test/util.h new file mode 100644 index 00000000000000..f06cb833b99635 --- /dev/null +++ b/tools/labs/templates/assignments/4-stp/checker/_test/util.h @@ -0,0 +1,69 @@ +/* + * useful structures/macros + */ + +#ifndef UTIL_H_ +#define UTIL_H_ 1 + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include + +#if defined(_WIN32) + +#include + +static VOID PrintLastError(const PCHAR message) +{ + CHAR errBuff[1024]; + + FormatMessage( + FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_MAX_WIDTH_MASK, + NULL, + GetLastError(), + 0, + errBuff, + sizeof(errBuff) - 1, + NULL); + + fprintf(stderr, "%s: %s\n", message, errBuff); +} + +#define ERR(call_description) \ + do { \ + fprintf(stderr, "(%s, %d): ", \ + __FILE__, __LINE__); \ + PrintLastError(call_description); \ + } while (0) + +#elif defined(__linux__) + +/* error printing macro */ +#define ERR(call_description) \ + do { \ + fprintf(stderr, "(%s, %d): ", \ + __FILE__, __LINE__); \ + perror(call_description); \ + } while (0) + +#else + #error "Unknown platform" +#endif + +/* print error (call ERR) and exit */ +#define DIE(assertion, call_description) \ + do { \ + if (assertion) { \ + ERR(call_description); \ + exit(EXIT_FAILURE); \ + } \ + } while (0) + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/tools/labs/templates/assignments/4-stp/stp.h b/tools/labs/templates/assignments/4-stp/stp.h new file mode 100644 index 00000000000000..838f9936bf55c5 --- /dev/null +++ b/tools/labs/templates/assignments/4-stp/stp.h @@ -0,0 +1,51 @@ +/* + * SO2 Transport Protocol + */ + +#ifndef STP_H_ +#define STP_H_ 1 + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +/* STP reuses the defines of ancient protocols like Econet and Xerox PUP + * because adding a new protocol would involve patching the kernel, which we + * don't want to do and besides that, they are probably not used anymore. + */ +#define AF_STP 19 +#define PF_STP AF_STP +#define ETH_P_STP 0x0a00 + +struct stp_hdr { + __be16 dst; /* Destination port */ + __be16 src; /* Source port */ + __be16 len; /* Total length, including header */ + __u8 flags; /* */ + __u8 csum; /* xor of all bytes, including header */ +}; + +struct sockaddr_stp { + unsigned short sas_family; /* Always AF_STP */ + int sas_ifindex; /* Interface index */ + __be16 sas_port; /* Port */ + __u8 sas_addr[6]; /* MAC address */ +}; + +/* STP protocol name; used as identifier in /proc/net/protocols */ +#define STP_PROTO_NAME "STP" + +/* + * STP uses proc interface to communicate statistical information to + * user space (in /proc/net/). + */ +#define STP_PROC_NET_FILENAME "stp_stats" +#define STP_PROC_FULL_FILENAME "/proc/net/" STP_PROC_NET_FILENAME + +#ifdef __cplusplus +} +#endif + +#endif /* STP_H_ */ diff --git a/tools/labs/templates/assignments/5-pitix/checker/Makefile b/tools/labs/templates/assignments/5-pitix/checker/Makefile new file mode 100644 index 00000000000000..e0f7e2f32f7e07 --- /dev/null +++ b/tools/labs/templates/assignments/5-pitix/checker/Makefile @@ -0,0 +1,19 @@ +# +# pitix filesystem - test Makefile +# + +CFLAGS = -Wall -Wextra -g -m32 +LDFLAGS = -static -m32 + +.PHONY: all + +all: mkfs.pitix + ln -sf _test/mkfs.pitix mkfs.pitix + +mkfs.pitix: _test/ + make -C _test/ + +clean: + -rm -f *~ + -rm -f mkfs.pitix + make -C _test/ clean diff --git a/tools/labs/templates/assignments/5-pitix/checker/README b/tools/labs/templates/assignments/5-pitix/checker/README new file mode 100644 index 00000000000000..6c28010cac375f --- /dev/null +++ b/tools/labs/templates/assignments/5-pitix/checker/README @@ -0,0 +1,46 @@ += PITIX FS TEST SUITE == + +Test suite for PITIX FS + +== FILES == + +README + * this file + +Makefile.checker + * Makefile for automating the build process + +_checker + * script to run all tests + +_test/mkfs.pitix.c + * tool for formatting a device to PITIX FS + +pitix.loop.gz + * image to be mounted by tests + +pitix.files.tar.gz + * files to be verified by tests + +create_pitix_loop.sh + * script to create pitix.loop.gz from pitix.files.tar.gz + * needs fully functional pitix.ko + * should be run in QEMU/KVM virtual machine + +== BUILDING == + +Use the Makefile to properly build the mkfs.pitix executable: + + make -f Makefile.checker + +== RUNNING == + +Copy your pitix.ko module and _checker, pitix.files.tar.gz, pitix.loop.gz and +mkfs.pitix to fsimg/root directory on your QEMU/KVM virtual machine. + +In order to run the test suite you can use the _checker script. + +The _checker script runs all tests: + + ./_checker + diff --git a/tools/labs/templates/assignments/5-pitix/checker/_checker b/tools/labs/templates/assignments/5-pitix/checker/_checker new file mode 100755 index 00000000000000..e717eedf7b73b8 --- /dev/null +++ b/tools/labs/templates/assignments/5-pitix/checker/_checker @@ -0,0 +1,383 @@ +#!/bin/sh + +mkfs_cmd=./mkfs.pitix + +inode_direct_data_blocks=5 +inode_size=32 + +test_ok=0 +test_no=0 + +# +# $1 - start string +# $2 - end string +# $3 - extra string +pretty_print() +{ + start_string="$1" + end_string="$2" + extra_string="$3" + dot_string="" + + n_dots=$((72 - ${#start_string} - ${#end_string})) + + for local_i in $(seq 0 $(($n_dots-1))); do + dot_string="${dot_string}." + done + + echo "$start_string$dot_string$end_string" + if ! test -z "$extra_string"; then + echo -e " -- $extra_string" + fi +} + +# +# $1 - string +# $2 - command +verbose_comm() +{ + start_string="$1" + shift + + eval $@ > /dev/null 2>&1; ret=$?; + if ! test "$ret" = "0"; then + end_string="failed" + else + end_string="ok" + fi + + pretty_print "$start_string" "$end_string" "" +} + + +# +# $1 - string +# $2 - command +check_true() +{ + test_no=$(($test_no+1)) + start_string="test $test_no: $1" + shift + + eval $@ > /dev/null 2>&1; ret=$?; + if ! test "$ret" = "0"; then + end_string="failed" + extra_string="$@" + ret=1 + else + test_ok=$(($test_ok+1)) + extra_string="" + end_string="ok" + ret=0 + fi + + pretty_print "$start_string" "$end_string" "$extra_string" + + return $ret +} + +# +# $1 - string +# $2 - command +check_false() +{ + test_no=$(($test_no+1)) + start_string="test $test_no: $1" + shift + + eval $@ > /dev/null 2>&1; ret=$?; + if test "$ret" = "0"; then + end_string="failed" + extra_string="$@" + else + test_ok=$(($test_ok+1)) + end_string="ok" + extra_string="" + fi + + pretty_print "$start_string" "$end_string" "$extra_string" +} + + +get_file_attrs() +{ + rights=0$(stat -c %a $1) + uid=$(stat -c %u $1) + gid=$(stat -c %g $1) + size=$(stat -c %s $1) + case "$(stat -c %F $1)" in + "regular file") type="-f" ;; + "regular empty file") type="-f" ;; +"directory") type="-d" ;; + *) type="-bad" ;; + esac +} + +# $1 file +# $2 size +# $3 rights +# $4 uid +# $5 gid +# $6 type +# $7 path to gold file +check_file() +{ + get_file_attrs /tmp/pitix.mnt/$1 + + check_true "check $1" test $6 /tmp/pitix.mnt/$1 + check_true "check $1 size" test "$size" = "$2" + check_true "check $1 rights" let \"$rights == $3\" + check_true "check $1 uid" let \"$uid == $4\" + check_true "check $1 gid" let \"$gid == $5\" + check_true "check $1 data" diff /tmp/pitix.mnt/$1 $7/$1 +} + +get_statfs() +{ + cfblocks=$(stat -f -c %f $1) + cfinodes=$(stat -f -c %d $1) + cblocks=$(stat -f -c %b $1) + cinodes=$(stat -f -c %c $1) +} + +check_statfs() +{ + get_statfs /tmp/pitix.mnt/ + check_true "check free blocks" test "$cfblocks" = "$fblocks" || echo "$cfblocks" = "$fblocks" + check_true "check free inodes" test "$cfinodes" = "$finodes" || echo "$cfinodes" = "$finodes" +} + + +# $1 file +# $2 size +# $3 rights +# $4 uid +# $5 gid +do_create() +{ + verbose_comm "generating random data" dd if=/dev/urandom bs=1 count=$2 \> /tmp/pitix.rw/$1 + chmod $3 /tmp/pitix.rw/$1 + chown $4.$5 /tmp/pitix.rw/$1 + check_true "copy file to /tmp/pitix.mnt/$1" \ + cp -p /tmp/pitix.rw/$1 /tmp/pitix.mnt/$1 + + if [ $? -eq 0 ]; then + finodes=$(($finodes-1)) + current_file_blocks=$((($2+$block_size-1)/$block_size)) + if [ $current_file_blocks -gt $inode_direct_data_blocks ]; then + current_file_blocks=$(($current_file_blocks+1)) + fi + fblocks=$(($fblocks-$current_file_blocks)) + fi + + check_file $1 $2 $3 $4 $5 -f /tmp/pitix.rw/ + check_statfs +} + +# $1 dir +# $2 rights +# $3 uid +# $4 gid +do_mkdir() +{ + mkdir /tmp/pitix.rw/$1 + chmod $2 /tmp/pitix.rw/$1 + chown $3.$4 /tmp/pitix.rw/$1 + + check_true "check mkdir" mkdir /tmp/pitix.mnt/$1 && \ + finodes=$(($finodes-1)) && \ + fblocks=$(($fblocks-1)) + + check_true "check chmod" chmod $2 /tmp/pitix.mnt/$1 + check_true "check chown" chown $3.$4 /tmp/pitix.mnt/$1 + + check_file $1 $block_size $2 $3 $4 -d /tmp/pitix.rw/ + check_statfs +} + +# $1 file +do_unlink() +{ + get_file_attrs /tmp/pitix.mnt/$1 + + rm -f /tmp/pitix.rw/$1 + check_true "check unlink 1" rm /tmp/pitix.mnt/$1 + check_true "check unlink 2" ! test -e /tmp/pitix.mnt/$1 + + if [ $? -eq 0 ]; then + finodes=$(($finodes+1)) + current_file_blocks=$((($size+$block_size-1)/$block_size)) + if [ $current_file_blocks -gt $inode_direct_data_blocks ]; then + current_file_blocks=$(($current_file_blocks+1)) + fi + fblocks=$(($fblocks+$current_file_blocks)) + fi + + check_statfs +} + +# $1 file +do_false_unlink() +{ + check_false "check false unlink " rm /tmp/pitix.mnt/$1 +} + + +# $1 file +do_rmdir() +{ + rmdir /tmp/pitix.rw/$1 + check_true "check rmdir 1" rmdir /tmp/pitix.mnt/$1 + check_true "check rmdir 2" ! test -e /tmp/pitix.mnt/$1 + finodes=$(($finodes+1)) + fblocks=$(($fblocks+1)) + check_statfs +} + +# $1 file +do_false_rmdir() +{ + check_false "check false rmdir" rmdir /tmp/pitix.mnt/$1 +} + + +# $1 file +do_truncate() +{ + get_file_attrs /tmp/pitix.mnt/$1 + old_blocks=$((($size+$block_size-1)/$block_size)) + + if [ $old_blocks -gt $inode_direct_data_blocks ]; then + old_blocks=$(($old_blocks+1)) + fi + + echo dummy > /tmp/pitix.rw/$1 + check_true "check truncate" echo dummy \> /tmp/pitix.mnt/$1 && \ + fblocks=$(($fblocks+$old_blocks-1)) + + check_file $1 6 $rights $uid $gid -f /tmp/pitix.rw/ + check_statfs +} + + +# $1 block size +test_rw() +{ + block_size=$1 + de_per_dir=$(($block_size/20)) + blocks=$((8*$block_size)) + inodes=$((32*$block_size/$inode_size)) + fblocks=$(($blocks-1)) + finodes=$(($inodes-1)) + + rm -Rf /tmp/pitix.loop /tmp/pitix.rw /tmp/pitix.mnt > /dev/null 2>&1; mkdir /tmp/pitix.rw; mkdir /tmp/pitix.mnt + + dd if=/dev/zero bs=1 count=4096 > /tmp/pitix.loop + check_false "mounting bad fs 1" mount -t pitix /tmp/pitix.loop /tmp/pitix.mnt/ -o loop + echo > /tmp/pitix.loop + check_false "mounting bad fs 2" mount -t pitix /tmp/pitix.loop /tmp/pitix.mnt/ -o loop + + verbose_comm "formating fs" "$mkfs_cmd" $1 /tmp/pitix.loop + check_true "mounting fs" mount -t pitix /tmp/pitix.loop /tmp/pitix.mnt/ -o loop + if [ $? -ne 0 ]; then + exit 1 + fi + + get_statfs /tmp/pitix.mnt + check_true "check total blocks" test "$cblocks" = "$blocks" || echo "$cblocks" = "$blocks" + check_true "check total inodes" test "$cinodes" = "$inodes" || echo "$cinodes" = "$inodes" + + do_mkdir lots_of_files 0777 0 0 + for i in $(seq 0 $(($de_per_dir-1))); do + do_create lots_of_files/file$i $(($i*1024)) 0$(($i%8))$((($i/2)%8))$((($i/4)%8)) $(($i%100)) $(($i*32%100)) + done + + check_false "dir overflow" touch /tmp/pitix.mnt/lots_of_files/the_drop + + check_statfs + + check_true "umounting fs" umount /tmp/pitix.mnt/ + check_true "mounting fs" mount -t pitix /tmp/pitix.loop /tmp/pitix.mnt/ -o loop + if [ $? -ne 0 ]; then + exit 1 + fi + + check_statfs + + for i in $(seq 0 $(($de_per_dir-1))); do + check_file lots_of_files/file$i $(($i*1024)) 0$(($i%8))$((($i/2)%8))$((($i/4)%8)) $(($i%100)) $(($i*32%100)) -f /tmp/pitix.rw/ + if let "$(($i%2)) == 0"; then + do_truncate lots_of_files/file$i + else + do_unlink lots_of_files/file$i + do_false_unlink lots_of_files/file$i + fi + done + + for i in $(seq 1 $(($de_per_dir-1))); do + do_mkdir dir$i 0666 0 0 + do_create dir$i/dummy 4096 0666 0 0 + done + + for i in $(seq 1 $(($de_per_dir-1))); do + do_false_rmdir dir$i + do_unlink dir$i/dummy + do_rmdir dir$i + done + + + check_false "module unloading" rmmod pitix + check_true "umounting fs" umount /tmp/pitix.mnt/ +} + +test_ro() +{ + rm -Rf /tmp/pitix.loop /tmp/pitix.mnt /tmp/pitix.ro > /dev/null 2>&1; mkdir /tmp/pitix.ro; mkdir /tmp/pitix.mnt + cp pitix.loop.gz /tmp/pitix.loop.gz + gunzip /tmp/pitix.loop.gz + tar xzf pitix.files.tar.gz -C /tmp/pitix.ro + + check_true "mounting fs" mount -t pitix /tmp/pitix.loop /tmp/pitix.mnt/ -o loop + if [ $? -ne 0 ]; then + exit 1 + fi + + for file in $(find /tmp/pitix.ro -type f | cut -f4- -d/); do + get_file_attrs /tmp/pitix.ro/$file + check_file $file $size $rights $uid $gid $type /tmp/pitix.ro/ + done + + check_false "module unloading" rmmod pitix + check_true "umounting fs" umount /tmp/pitix.mnt/ +} + +cleanup_world() +{ + umount /tmp/pitix.mnt > /dev/null 2>&1 + rmmod pitix > /dev/null 2>&1 + rm /tmp/pitix.loop > /dev/null 2>&1 + rm pitix.loop > /dev/null 2>&1 + rm -r /tmp/pitix.ro > /dev/null 2>&1 + rm -r /tmp/pitix.rw > /dev/null 2>&1 + rm -r /tmp/pitix.mnt > /dev/null 2>&1 +} + +cleanup_world + +mkdir -p /tmp + +check_true "module loading" insmod pitix.ko +check_true "test /proc/filesystems" grep pitix /proc/filesystems +test_ro +check_true "module unloading" rmmod pitix + +check_true "module loading" insmod pitix.ko +check_true "test /proc/filesystems" grep pitix /proc/filesystems +test_rw 512 +check_true "module unloading" rmmod pitix + +cleanup_world + +echo "Tests ok: $test_ok/$test_no" # could also display as percent + +# vim: set tabstop=4 shiftwidth=4: diff --git a/tools/labs/templates/assignments/5-pitix/checker/_test/.gitignore b/tools/labs/templates/assignments/5-pitix/checker/_test/.gitignore new file mode 100644 index 00000000000000..3a9802ba54214a --- /dev/null +++ b/tools/labs/templates/assignments/5-pitix/checker/_test/.gitignore @@ -0,0 +1 @@ +/mkfs.pitix diff --git a/tools/labs/templates/assignments/5-pitix/checker/_test/Makefile b/tools/labs/templates/assignments/5-pitix/checker/_test/Makefile new file mode 100644 index 00000000000000..f2f8374f1260b1 --- /dev/null +++ b/tools/labs/templates/assignments/5-pitix/checker/_test/Makefile @@ -0,0 +1,13 @@ +CFLAGS = -Wall -Wextra -g -m32 +LDFLAGS = -static -m32 + +.PHONY: all clean + +all: mkfs.pitix + +mkfs.pitix: mkfs.pitix.o + +mkfs.pitix.o: mkfs.pitix.c pitix.h + +clean: + -rm -f *~ mkfs.pitix.o mkfs.pitix diff --git a/tools/labs/templates/assignments/5-pitix/checker/_test/mkfs.pitix.c b/tools/labs/templates/assignments/5-pitix/checker/_test/mkfs.pitix.c new file mode 100644 index 00000000000000..75f9e3260e76ac --- /dev/null +++ b/tools/labs/templates/assignments/5-pitix/checker/_test/mkfs.pitix.c @@ -0,0 +1,89 @@ +#include +#include +#include +#include +#include + +#include "pitix.h" + +/* + * mkfs.pitix block_size file + */ + +int main(int argc, char **argv) +{ + FILE *file; + char buffer[4096]; + int block_size, bits, i; + struct pitix_super_block psb; + struct pitix_inode root_inode; + + if (argc != 3) + return -1; + + block_size = atoi(argv[1]); + + switch (block_size) { + case 512: + bits = 9; + break; + case 1024: + bits = 10; + break; + case 2048: + bits = 11; + break; + case 4096: + bits = 12; + break; + default: + return -1; + } + + file = fopen(argv[2], "w+"); + if (!file) + return -1; + + memset(&psb, 0, sizeof(struct pitix_super_block)); + + psb.magic = PITIX_MAGIC; + psb.version = PITIX_VERSION; + psb.block_size_bits = bits; + psb.imap_block = PITIX_SUPERBLOCK_SIZE / block_size; + psb.dmap_block = psb.imap_block + 1; + psb.izone_block = psb.dmap_block + 1; + psb.dzone_block = psb.izone_block + IZONE_BLOCKS; + psb.bfree = 8 * block_size; + psb.ffree = IZONE_BLOCKS * block_size / inode_size(); + + printf("mkfs.pitix block_size=%d\n", block_size); + + /* zero disk */ + memset(buffer, 0, block_size); + for (i = 0; i < psb.bfree + IZONE_BLOCKS + 1 + 1 + psb.imap_block; i++) + fwrite(buffer, block_size, 1, file); + + fseek(file, 0, SEEK_SET); + + /* alloc the 1st block and inode to the roor dir */ + psb.bfree--; psb.ffree--; + /* initialize super block */ + fwrite(&psb, sizeof(psb), 1, file); + + fseek(file, PITIX_SUPERBLOCK_SIZE, SEEK_SET); + memset(buffer, 0, block_size); + buffer[0] = 0x01; + /* alloc inode 0 */ + fwrite(buffer, block_size, 1, file); + /* alloc block 0 */ + fwrite(buffer, block_size, 1, file); + + /* initialize root inode */ + memset(&root_inode, 0, sizeof(root_inode)); + root_inode.mode = S_IFDIR; + root_inode.size = block_size; + fseek(file, psb.izone_block * block_size, SEEK_SET); + fwrite(&root_inode, sizeof(root_inode), 1, file); + + return 0; +} diff --git a/tools/labs/templates/assignments/5-pitix/checker/_test/pitix.h b/tools/labs/templates/assignments/5-pitix/checker/_test/pitix.h new file mode 100644 index 00000000000000..2cca5839aac3a1 --- /dev/null +++ b/tools/labs/templates/assignments/5-pitix/checker/_test/pitix.h @@ -0,0 +1,138 @@ +#ifndef PITIX_H_ +#define PITIX_H_ + +#include + +#define PITIX_MAGIC 0x58495450 /* ascii little endian for PTIX */ +#define PITIX_VERSION 2 +#define PITIX_SUPERBLOCK_SIZE 4096 +#define PITIX_NAME_LEN 16 +#define IZONE_BLOCKS 32 +#define INODE_DIRECT_DATA_BLOCKS 5 + +/* + * filesystem layout: + * + * SB IMAP DMAP IZONE DATA + * ^ ^ (1 block) (1 block) (32 blocks) + * | | + * +-0 +-- 4096 + */ + +/* PITIX super block on disk + * could be reused for in-memory super block + */ +struct pitix_super_block { + unsigned long magic; + __u8 version; + __u8 block_size_bits; + __u8 imap_block; + __u8 dmap_block; + __u8 izone_block; + __u8 dzone_block; + __u16 bfree; + __u16 ffree; +#ifdef __KERNEL__ + struct buffer_head *sb_bh, *dmap_bh, *imap_bh; + __u8 *dmap, *imap; +#endif +}; + +/* PITIX dir entry on disk */ +struct __attribute__((__packed__)) pitix_dir_entry { + __u32 ino; + char name[PITIX_NAME_LEN]; +}; + +/* PITIX inode on disk */ +struct __attribute__((__packed__)) pitix_inode { + __u32 mode; + uid_t uid; + gid_t gid; + __u32 size; + __u32 time; + __u16 direct_data_blocks[INODE_DIRECT_DATA_BLOCKS]; + __u16 indirect_data_block; +}; + +/* returns size of PITIX inode on disk */ +static inline int inode_size(void) +{ + return sizeof(struct pitix_inode); +} + +/* returns size of PITIX dir entry on disk */ +static inline int dir_entry_size(void) +{ + return sizeof(struct pitix_dir_entry); +} + +#ifdef __KERNEL__ + +/* returns number of PITIX inodes on disk */ +static inline long get_inodes(struct super_block *sb) +{ + return IZONE_BLOCKS * sb->s_blocksize / inode_size(); +} + +/* returns number of PITIX dir entries per block */ +static inline int dir_entries_per_block(struct super_block *sb) +{ + return sb->s_blocksize / dir_entry_size(); +} + +/* returns number of data blocks on disk */ +static inline long get_blocks(struct super_block *sb) +{ + return 8 * sb->s_blocksize; +} + +/* file system ops */ +extern struct file_system_type pitix_fs_type; +extern struct dentry *pitix_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data); + +/* super block ops */ +extern const struct super_operations pitix_sops; +extern struct inode *pitix_alloc_inode(struct super_block *sb); +extern void pitix_destroy_inode(struct inode *inode); +extern int pitix_write_inode(struct inode *inode, + struct writeback_control *wbc); +extern void pitix_evict_inode(struct inode *inode); +extern int pitix_statfs(struct dentry *dentry, struct kstatfs *buf); +extern void pitix_put_super(struct super_block *sb); + +/* file ops */ +extern const struct file_operations pitix_file_operations; + +/* file inode ops */ +extern const struct inode_operations pitix_file_inode_operations; +extern int pitix_setattr(struct dentry *dentry, struct iattr *attr); + +/* dir ops */ +extern const struct file_operations pitix_dir_operations; +extern int pitix_readdir(struct file *filp, struct dir_context *ctx); + +/* dir inode ops */ +extern const struct inode_operations pitix_dir_inode_operations; +extern struct dentry *pitix_lookup(struct inode *dir, + struct dentry *dentry, unsigned int flags); +extern int pitix_create(struct inode *dir, struct dentry *dentry, umode_t mode, + bool excl); +extern int pitix_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode); +extern int pitix_unlink(struct inode *dir, struct dentry *dentry); +extern int pitix_rmdir(struct inode *dir, struct dentry *dentry); + +/* addr space ops */ +extern const struct address_space_operations pitix_aops; +extern int pitix_readpage(struct file *file, struct page *page); +extern int pitix_writepage(struct page *page, struct writeback_control *wbc); +extern int pitix_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned int len, unsigned int flags, + struct page **pagep, void **fsdata); +extern sector_t pitix_bmap(struct address_space *mapping, sector_t block); + +#endif + +#endif + diff --git a/tools/labs/templates/assignments/5-pitix/checker/create_pitix_loop.sh b/tools/labs/templates/assignments/5-pitix/checker/create_pitix_loop.sh new file mode 100755 index 00000000000000..d5fc29a21add9c --- /dev/null +++ b/tools/labs/templates/assignments/5-pitix/checker/create_pitix_loop.sh @@ -0,0 +1,33 @@ +#!/bin/sh + +set -e +set -x + +if [ $# -ne 1 ]; then + echo "usage: $0 " + exit 1 +fi + +block_size=$1 + +mkdir -p /tmp/pitix.ro +mkdir -p /tmp/pitix.mnt + +tar -xzf pitix.files.tar.gz -C /tmp/pitix.ro +./mkfs.pitix $block_size /tmp/pitix.loop + +insmod pitix.ko +mount -t pitix /tmp/pitix.loop /tmp/pitix.mnt -o loop + +cp -pr /tmp/pitix.ro/* /tmp/pitix.mnt/ +ls -lR /tmp/pitix.mnt + +umount /tmp/pitix.mnt +rmmod pitix + +gzip /tmp/pitix.loop +mv /tmp/pitix.loop.gz . + +rm -rf /tmp/pitix.ro +rm -rf /tmp/pitix.mnt + diff --git a/tools/labs/templates/assignments/5-pitix/checker/pitix.files.tar.gz b/tools/labs/templates/assignments/5-pitix/checker/pitix.files.tar.gz new file mode 100644 index 00000000000000..eff75e041b331d Binary files /dev/null and b/tools/labs/templates/assignments/5-pitix/checker/pitix.files.tar.gz differ diff --git a/tools/labs/templates/assignments/5-pitix/checker/pitix.loop.gz b/tools/labs/templates/assignments/5-pitix/checker/pitix.loop.gz new file mode 100644 index 00000000000000..8c282d6173054c Binary files /dev/null and b/tools/labs/templates/assignments/5-pitix/checker/pitix.loop.gz differ diff --git a/tools/labs/templates/assignments/5-pitix/pitix.h b/tools/labs/templates/assignments/5-pitix/pitix.h new file mode 100644 index 00000000000000..214846cf9f1103 --- /dev/null +++ b/tools/labs/templates/assignments/5-pitix/pitix.h @@ -0,0 +1,121 @@ +#ifndef _PITIX_H +#define _PITIX_H + +#define PITIX_MAGIC 0x58495450 /* ascii little endian for PTIX */ +#define IZONE_BLOCKS 32 +#define INODE_DIRECT_DATA_BLOCKS 5 +#define PITIX_NAME_LEN 16 + +/* + * filesystem layout: + * + * SB IMAP DMAP IZONE DATA + * ^ ^ (1 block) (1 block) (32 blocks) + * | | + * +-0 +-- 4096 + */ + +struct pitix_super_block { + unsigned long magic; + __u8 version; + __u8 block_size_bits; + __u8 imap_block; + __u8 dmap_block; + __u8 izone_block; + __u8 dzone_block; + __u16 bfree; + __u16 ffree; +#ifdef __KERNEL__ + struct buffer_head *sb_bh, *dmap_bh, *imap_bh; + __u8 *dmap, *imap; +#endif +}; + +struct pitix_dir_entry { + __u32 ino; + char name[PITIX_NAME_LEN]; +}; + +struct pitix_inode { + __u32 mode; + uid_t uid; + gid_t gid; + __u32 size; + __u32 time; + __u16 direct_data_blocks[INODE_DIRECT_DATA_BLOCKS]; + __u16 indirect_data_block; +}; + +#ifdef __KERNEL__ +static inline int inode_size(void) +{ + return sizeof(struct pitix_inode); +} + +static inline int dir_entry_size(void) +{ + return sizeof(struct pitix_dir_entry); +} + +static inline int dir_entries_per_block(struct super_block *sb) +{ + return sb->s_blocksize/dir_entry_size(); +} + +static inline long get_blocks(struct super_block *sb) +{ + return 8*sb->s_blocksize; +} + +static inline long get_inodes(struct super_block *sb) +{ + return IZONE_BLOCKS*sb->s_blocksize/inode_size(); +} + +static inline long pitix_inodes_per_block(struct super_block *sb) +{ + return sb->s_blocksize / inode_size(); +} + +/* Bitmap operations */ +extern int pitix_alloc_block(struct super_block *sb); +extern void pitix_free_block(struct super_block *sb, int block); +extern int pitix_alloc_inode(struct super_block *sb); +extern void pitix_free_inode(struct super_block *sb, int ino); +extern int pitix_get_block(struct inode *inode, sector_t block, + struct buffer_head *bh_result, int create); +extern struct address_space_operations pitix_aops; + +/* Dir operations */ +extern struct inode_operations pitix_dir_inode_operations; +extern struct file_operations pitix_dir_operations; +ino_t pitix_inode_by_name(struct dentry *dentry, int delete); + +/* File operations */ +extern struct file_operations pitix_file_operations; +extern struct inode_operations pitix_file_inode_operations; +void pitix_truncate(struct inode *inode); + +/* Inode operations */ +extern struct inode *pitix_new_inode(struct super_block *sb); +extern int pitix_write_inode(struct inode *inode, struct writeback_control *wbc); +extern void pitix_evict_inode(struct inode *inode); + +extern struct inode *pitix_iget(struct super_block *sb, unsigned long ino); + +/* Super operations */ +extern int pitix_fill_super(struct super_block *sb, void *data, int silent); +extern struct super_operations pitix_sops; +#endif + +static inline struct pitix_super_block *pitix_sb(struct super_block *sb) +{ + return sb->s_fs_info; +} + +static inline struct pitix_inode *pitix_i(struct inode *inode) +{ + return inode->i_private; +} + +#endif diff --git a/tools/labs/templates/assignments/6-e100/Kbuild b/tools/labs/templates/assignments/6-e100/Kbuild new file mode 100644 index 00000000000000..c8ed5a306348eb --- /dev/null +++ b/tools/labs/templates/assignments/6-e100/Kbuild @@ -0,0 +1,3 @@ +ccflags-y = -Wno-unused-function -Wno-unused-label -Wno-unused-variable + +obj-m = e100-ix.o diff --git a/tools/labs/templates/assignments/6-e100/_checker b/tools/labs/templates/assignments/6-e100/_checker new file mode 100755 index 00000000000000..0a00a7bd3f46d2 --- /dev/null +++ b/tools/labs/templates/assignments/6-e100/_checker @@ -0,0 +1,59 @@ +#!/bin/sh + +DEV=ixeth0 +ADDR=172.30.0.2 +REM_ADDR=172.30.0.1 +NTTCP=./nttcp +MODULE=./e100-ix.ko + +set -e + +# Uncomment this to show what commands are run. +#set -x + +dmesg -c &> /dev/null + +echo +echo "== 01. insmod, rmmod ==" +insmod $MODULE +dmesg -c +rmmod $MODULE +dmesg -c + +echo +echo "== 02. link up, set address ==" +insmod $MODULE +dmesg -c +ip link set up dev $DEV +ip address add $ADDR/24 dev $DEV + +echo +echo "== 03. ping ==" +ping -c1 $REM_ADDR + +echo +echo "== 04. nttcp transmit ==" +$NTTCP -T -t -c $REM_ADDR + +echo +echo "== 05. nttcp receive ==" +$NTTCP -T -r -c $REM_ADDR + +echo +echo "== 06. rmmod, reinsert ==" +dmesg -c +rmmod $MODULE +dmesg -c +insmod $MODULE +dmesg -c + +echo +echo "== 07. one last test ==" +ip link set up dev $DEV +ip address add $ADDR/24 dev $DEV +ping -c1 $REM_ADDR +$NTTCP -T -t -c $REM_ADDR +rmmod $MODULE + +echo +echo "Success!" diff --git a/tools/labs/templates/assignments/6-e100/e100-ix.c b/tools/labs/templates/assignments/6-e100/e100-ix.c new file mode 100644 index 00000000000000..587e8362495174 --- /dev/null +++ b/tools/labs/templates/assignments/6-e100/e100-ix.c @@ -0,0 +1,193 @@ +/* + * e100-ix.c, driver implementation + * + * TODO 0: FirstName LastName + */ + +#include +#include + +#include "e100-ix.h" + +/* + * e100 private data + * + * @pdev - PCI device + * @netdev - network device + */ +struct e100_priv_data { + struct pci_dev *pdev; + struct net_device *netdev; + /* TODO 3: device control and configuration + * e.g: + * - CSR register address + */ +}; + +static irqreturn_t e100_intr(int irq, void *private_data) +{ + struct e100_priv_data *data; + + data = (struct e100_priv_data *)private_data; + + /* TODO 6: read STAT/ACK byte from CSR */ + + /* TODO 6: return IRQ_NONE if interrupt is not for this device */ + + /* TODO 6: handle Frame Reception interrupt */ + /* + * while receving frames + * allocate skb + * copy data from Receive Frame Descriptor to skb + * free current RFD + * resume receive unit + * push skb up to network stack using netif_rx + */ + + /* ACK all interrupts */ + + return IRQ_HANDLED; +} + +static int e100_ndo_open(struct net_device *netdev) +{ + struct e100_priv_data *data; + + data = netdev_priv(netdev); + + /* TODO 5: Create TX ring buffer to store CB_RING_LEN Command Blocks */ + + /* TODO 5: first command to ring buffer to set MAC */ + + /* TODO 6: Create RX ring buffer to store RFD_RING_LEN */ + + /* TODO 6: register interrupt handler */ + + /* TODO 6: enable interrupts */ + + /* TODO 5: start command unit */ + + /* TODO 6: start receive unit */ + + /* TODO 5: allow transmit by calling netif_start_queue */ + + return 0; +} + +static int e100_ndo_stop(struct net_device *netdev) +{ + struct e100_priv_data *data; + + date = netdev_priv(netdev); + + /* TODO 5: stop transmit by calling netif_stop_queue */ + + /* TODO 6: disable network interrupts and free irq */ + + /* TODO 5: deallocate TX ring */ + /* TODO 6: deallocate RX ring */ + + return 0; +} + +/* + * e100_ndo_start_xmit - transmit skb over netdev + * + */ +static netdev_tx_t e100_ndo_start_xmit(struct sk_buff *skb, struct net_device *netdev) +{ + struct e100_priv_data *data; + + data = netdev_priv(netdev); + + /* TODO 5: reclaim all buffers which were transmitted */ + /* TODO 5: create new transmit command for current skb */ + + /* TODO 5: resume command unit */ + return NETDEV_TX_OK; +} + +struct net_device_ops e100_netdev_ops = { + .ndo_open = e100_ndo_open, + .ndo_stop = e100_ndo_stop, + .ndo_start_xmit = e100_ndo_start_xmit +}; + +static int e100_probe(struct pci_dev *pdev, const struct pci_device_id *id) +{ + /* TODO 4: allocate netdevice, may use alloc_etherdev + * + * .. set proper name, irq, netdev_ops + * .. set mac address (may use eth_hw_addr_random) + */ + + /* TODO 4: get netdevice private data using netdev_priv */ + + /* TODO 2: hide e100_priv_data into pdev using dev_set_drvdata */ + + + /* TODO 2: initialize PCI device: use pci_enable_device */ + + /* TODO 2: reserve PCI I/O and memory resources: use pci_request_regions */ + + /* TODO 2: we will use BAR 1, use pci_resource_flags to check for BAR 1*/ + + /* TODO 2: Check if device supports 32-bit DMA, use pci_set_dma_mask */ + + /* TODO 2: map Control Status Register into our address space, use pci_iomap */ + + /* TODO 2: enable DMA by calling pci_set master */ + + /* TODO 4: register netdevice with the networking subsystem */ + + return 0; +} + +static void e100_remove(struct pci_dev *pdev) +{ + struct e100_priv_data *data; + + /* TODO 2: restore e100_priv_data from pdev using dev_get_drvdata */ + + /* TODO 4: unregister netdevice from the networking subsystem */ + + /* TODO 2: PCI cleanup + * * unmap CSR + * * release PCI regions + * * disable pci device + */ + + /* TODO 4: free netdevice */ +} + +static const struct pci_device_id e100_pci_driver_ids[] = { + { + /* TODO 1: fill in .vendor and .device */ + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID + }, + { }, +}; +MODULE_DEVICE_TABLE(pci, e100_pci_driver_ids); + + +/* TODO 1: initialize struct pci_driver */ +static struct pci_driver e100_pci_driver = { +}; + +static int e100_init(void) +{ + /* TODO 1: register PCI driver */ +} + +static void e100_exit(void) +{ + /* TODO 1: unregister PCI driver */ +} + +module_init(e100_init); +module_exit(e100_exit); + +MODULE_DESCRIPTION("e100 network driver"); +MODULE_AUTHOR("TODO 0: FirstName LastName "); +MODULE_LICENSE("GPL"); diff --git a/tools/labs/templates/assignments/6-e100/e100-ix.h b/tools/labs/templates/assignments/6-e100/e100-ix.h new file mode 100644 index 00000000000000..8a2b48866d25f7 --- /dev/null +++ b/tools/labs/templates/assignments/6-e100/e100-ix.h @@ -0,0 +1,149 @@ +#ifndef __E100_IX__ +#define __E100_IX__ + +#define DRIVER_NAME "e100-ix" + +/* misc useful bits, use this or define your own bits */ +#define CB_SET_INDIVIDUAL_ADDR 0x01 +#define CB_TRANSMIT 0x04 + +#define CU_START 0x10 +#define CU_RESUME 0x20 +#define RU_START 0x01 +#define RU_RESUME 0x02 +#define RU_SUSPENDED 0x04 +#define CU_SUSPENDED 0x40 + +#define SOFTWARE_RESET 0x00000000 + +#define ENABLE_INTERRUPTS 0x00 +#define DISABLE_INTERRUPTS 0x01 + +#define MAC_ADDRESS_LEN 6 +#define DATA_LEN 1518 + +#define CSR_COMMAND 0x02 +#define CSR_INT_CONTROL 0x03 + +#define CB_RING_LEN 64 +#define RFD_RING_LEN 64 + +#define E100_VENDOR 0x8086 +#define E100_DEVICE 0x1209 + +/* struct csr - Control/Status Register */ +struct csr { + /* System-Control Block */ + struct { + u8 status; + u8 stat_ack; + u8 cmd_lo; + u8 cmd_hi; + u32 gen_ptr; + } scb; + + /* Device Reset */ + u32 port; + u16 flash_ctrl; + u8 eeprom_ctrl_lo; + u8 eeprom_ctrl_hi; + u32 mdi_ctrl; + u32 rx_dma_count; +}; + +/* struct tcb - Transmit Command Block */ +struct tcb { + u32 tbd_array; + u16 tcb_byte_count; + u8 threshold; + u8 tbd_number; + + /* Transmit Buffer Descriptor */ + struct { + __le32 buf_addr; + __le16 size; + u16 unused; + } tbd; +}; + +/* struct cb - Command Block */ +struct cb { + struct cb_status { + u16 unused1:12; + u8 u:1; + u8 ok:1; + u8 unused2:1; + u8 c:1; + } status; + + struct cb_command { + u16 cmd:3; + u8 sf:1; + u8 nc:1; + u8 zero:3; + u8 cid:5; + u8 i:1; + u8 suspend:1; + u8 el:1; + } command; + + u32 link; + + union { + /* Transmit Command Block */ + struct tcb tcb; + + /* Individual Address Setup */ + u8 ias[8]; + } u; + + struct cb *prev, *next; /* for CBL ring buffer */ + dma_addr_t dma_addr; + struct sk_buff *skb; /* when CB is of Transmit Command Block type */ +}; + +/* struct rfd - Receive Frame Descriptor */ +struct rfd { + struct rfd_status { + u16 status:13; + u8 ok:1; + u8 zero:1; + u8 c:1; + } status; + + struct rfd_command { + u16 zero1:3; + u8 sf:1; + u8 h:1; + u16 zero2:9; + u8 suspend:1; + u8 el:1; + } command; + + u32 link; + + u32 reserved; + + u16 actual_count:14; + u8 f:1; + u8 eol:1; + u16 size; + + char data[DATA_LEN]; + + struct rfd *prev, *next; + dma_addr_t dma_addr; +}; + +struct csr_stat_ack { + u8 fcp:1; + u8 res:1; + u8 swi:1; + u8 mdi:1; + u8 rnr:1; + u8 cna:1; + u8 frame_reception:1; + u8 cx:1; +}; + +#endif /* __E100_IX__ */ diff --git a/tools/labs/templates/assignments/6-e100/nttcp b/tools/labs/templates/assignments/6-e100/nttcp new file mode 100755 index 00000000000000..9cf0c97c5c2d19 Binary files /dev/null and b/tools/labs/templates/assignments/6-e100/nttcp differ diff --git a/tools/labs/templates/assignments/7-kvm-vmm/skel/Makefile b/tools/labs/templates/assignments/7-kvm-vmm/skel/Makefile new file mode 100644 index 00000000000000..cb9e9d529bd4da --- /dev/null +++ b/tools/labs/templates/assignments/7-kvm-vmm/skel/Makefile @@ -0,0 +1,24 @@ +CFLAGS = -Wall -Wextra -O2 + +run64: kvm-hello-world + ./kvm-hello-world + +kvm-hello-world: vmm.o payload.o virtual_machine.o virtual_cpu.o + $(CC) $^ -o $@ + +payload.o: payload.ld guest64.img.o guest16.o + $(LD) -T $< -o $@ + +guest64.o: guest_code.c + $(CC) $(CFLAGS) -m64 -ffreestanding -fno-pic -c -o $@ $^ + +guest64.img: guest64.o + $(LD) -T guest.ld $^ -o $@ + +%.img.o: %.img + $(LD) -b binary -r $^ -o $@ + +.PHONY: clean +clean: + $(RM) kvm-hello-world kvm-hello-world.o payload.o \ + *.o *.img \ No newline at end of file diff --git a/tools/labs/templates/assignments/7-kvm-vmm/skel/guest_16_bits/guest.ld b/tools/labs/templates/assignments/7-kvm-vmm/skel/guest_16_bits/guest.ld new file mode 100644 index 00000000000000..70e537e4ab03be --- /dev/null +++ b/tools/labs/templates/assignments/7-kvm-vmm/skel/guest_16_bits/guest.ld @@ -0,0 +1,8 @@ +OUTPUT_FORMAT(binary) +SECTIONS +{ + .start : { *(.start) } + .text : { *(.text*) } + .rodata : { *(.rodata) } + .data : { *(.data) } +} \ No newline at end of file diff --git a/tools/labs/templates/assignments/7-kvm-vmm/skel/guest_16_bits/guest16.s b/tools/labs/templates/assignments/7-kvm-vmm/skel/guest_16_bits/guest16.s new file mode 100644 index 00000000000000..2b93a10460d659 --- /dev/null +++ b/tools/labs/templates/assignments/7-kvm-vmm/skel/guest_16_bits/guest16.s @@ -0,0 +1,7 @@ + .code16 + .global code16, code16_end +guest16: + movw $42, %ax + movw %ax, 0x400 + hlt +guest16_end: \ No newline at end of file diff --git a/tools/labs/templates/assignments/7-kvm-vmm/skel/guest_32_bits/guest_code.c b/tools/labs/templates/assignments/7-kvm-vmm/skel/guest_32_bits/guest_code.c new file mode 100644 index 00000000000000..a60edde8eed15f --- /dev/null +++ b/tools/labs/templates/assignments/7-kvm-vmm/skel/guest_32_bits/guest_code.c @@ -0,0 +1,60 @@ +#include "device.h" + +// Phys addr memory layout: +// Code: [0x1000, sizeof(code)) +// Heap: (Guest Memory): [0x010_000, xxx) +// Stack: [xxx, 0x100_000) - grows down from 0x100_000 +// MMIO Devices: [0x100_000, 0x101_000) +static const uint64_t heap_phys_addr = 0x010000; +static const uint64_t dev_mmio_start = 0x100000; +simqueue_t g2h_queue; + +void create_q(uint64_t data_offset, int size, queue_control_t *qc) +{ + g2h_queue.maxlen = size; + g2h_queue.q_ctrl = qc; + g2h_queue.buffer = (void*)data_offset; + + //const char *p; + //for (p = "Ana are mere!\n"; *p; ++p) + // circ_bbuf_push(&g2h_queue, *p); +} + +/* Helper functions */ +static inline uint32_t inl(uint16_t port) +{ + uint32_t v; + asm volatile("inl %1,%0" : "=a" (v) : "dN" (port)); + return v; +} + +static inline void outl(uint16_t port, uint32_t v) +{ + asm volatile("outl %0,%1" : : "a" (v), "dN" (port)); +} + +static void outb(uint16_t port, uint8_t value) { + asm("outb %0,%1" : /* empty */ : "a" (value), "Nd" (port) : "memory"); +} + +static inline uint8_t inb(uint16_t port) { + uint8_t ret; + + asm volatile("inb %1, %0": "=a"(ret): "Nd"(port) ); + return ret; +} + +void +__attribute__((noreturn)) +__attribute__((section(".start"))) +_start(void) { + + for (p = "Hello, world!\n"; *p; ++p) + outb(0xE9, *p); + + *(long *) 0x400 = 42; + + /* TODO: Using the paravirtualized driver we have written for SIMVIRTIO, send + "Ana are mere!\n" */ + +} diff --git a/tools/labs/templates/assignments/7-kvm-vmm/skel/include/device.h b/tools/labs/templates/assignments/7-kvm-vmm/skel/include/device.h new file mode 100644 index 00000000000000..bb2875c3ce4195 --- /dev/null +++ b/tools/labs/templates/assignments/7-kvm-vmm/skel/include/device.h @@ -0,0 +1,28 @@ +#ifndef DEVICE_H_ +#define DEVICE_H_ + +#include "queue.h" + +#define MAGIC_VALUE 0x74726976 + +#define DEVICE_RESET 0x0 +#define DEVICE_CONFIG 0x2 +#define DEVICE_READY 0x4 + +#define DRIVER_ACK 0x0 +#define DRIVER 0x2 +#define DRIVER_OK 0x4 +#define DRIVER_RESET 0x8000 + +typedef struct device { + uint32_t magic; + uint8_t device_status; + uint8_t driver_status; + uint8_t max_queue_len; +} device_t; + +typedef struct device_table { + uint16_t count; + uint64_t device_addresses[10]; + } device_table_t; +#endif \ No newline at end of file diff --git a/tools/labs/templates/assignments/7-kvm-vmm/skel/include/queue.h b/tools/labs/templates/assignments/7-kvm-vmm/skel/include/queue.h new file mode 100644 index 00000000000000..e14294f74894ae --- /dev/null +++ b/tools/labs/templates/assignments/7-kvm-vmm/skel/include/queue.h @@ -0,0 +1,59 @@ +#ifndef QUEUE_H +#define QUEUE_H +/* Circular buffer queue */ + +// Queue elements type. +typedef uint8_t q_elem_t; + +typedef struct queue_control { + // Ptr to current available head/producer index in 'buffer'. + unsigned head; + // Ptr to last index in 'buffer' used by consumer. + unsigned tail; +} queue_control_t; + +typedef struct simqueue { + // MMIO queue control. + volatile queue_control_t *q_ctrl; + // Size of the queue buffer/data. + unsigned maxlen; + // Queue data buffer. + q_elem_t *buffer; +} simqueue_t; + +int circ_bbuf_push(simqueue_t *q, q_elem_t data) +{ + unsigned next, head; + + head = q->q_ctrl->head; // do a single mmio read and cache the value. + + next = head + 1; // next is where head will point to after this write. + if (next >= q->maxlen) + next = 0; + + if (next == q->q_ctrl->tail) // if the head + 1 == tail, circular buffer is full + return -1; + + q->buffer[head] = data; // Load data and then move + q->q_ctrl->head = next; // head to next data offset. + return 0; // return success to indicate successful push. +} + +int circ_bbuf_pop(simqueue_t *q, q_elem_t *data) +{ + unsigned next, tail; + + tail = q->q_ctrl->tail; // do a single mmio read and cache the value. + if (q->q_ctrl->head == tail) // if the head == tail, we don't have any data + return -1; + + next = tail + 1; // next is where tail will point to after this read. + if(next >= q->maxlen) + next = 0; + + *data = q->buffer[tail]; // Read data and then move + q->q_ctrl->tail = next; // tail to next offset. + return 0; // return success to indicate successful push. +} + +#endif //QUEUE_H \ No newline at end of file diff --git a/tools/labs/templates/assignments/7-kvm-vmm/skel/include/vcpu.h b/tools/labs/templates/assignments/7-kvm-vmm/skel/include/vcpu.h new file mode 100644 index 00000000000000..c16ec044d07719 --- /dev/null +++ b/tools/labs/templates/assignments/7-kvm-vmm/skel/include/vcpu.h @@ -0,0 +1,64 @@ + +#ifndef _VCPU_H_ +#define _VCPU_H_ + +#include "vm.h" + +#define CR0_PE 1u +#define CR0_MP (1U << 1) +#define CR0_EM (1U << 2) +#define CR0_TS (1U << 3) +#define CR0_ET (1U << 4) +#define CR0_NE (1U << 5) +#define CR0_WP (1U << 16) +#define CR0_AM (1U << 18) +#define CR0_NW (1U << 29) +#define CR0_CD (1U << 30) +#define CR0_PG (1U << 31) + +/* CR4 bits */ +#define CR4_VME 1 +#define CR4_PVI (1U << 1) +#define CR4_TSD (1U << 2) +#define CR4_DE (1U << 3) +#define CR4_PSE (1U << 4) +#define CR4_PAE (1U << 5) +#define CR4_MCE (1U << 6) +#define CR4_PGE (1U << 7) +#define CR4_PCE (1U << 8) +#define CR4_OSFXSR (1U << 8) +#define CR4_OSXMMEXCPT (1U << 10) +#define CR4_UMIP (1U << 11) +#define CR4_VMXE (1U << 13) +#define CR4_SMXE (1U << 14) +#define CR4_FSGSBASE (1U << 16) +#define CR4_PCIDE (1U << 17) +#define CR4_OSXSAVE (1U << 18) +#define CR4_SMEP (1U << 20) +#define CR4_SMAP (1U << 21) + +#define EFER_SCE 1 +#define EFER_LME (1U << 8) +#define EFER_LMA (1U << 10) +#define EFER_NXE (1U << 11) + +/* 32-bit page directory entry bits */ +#define PDE32_PRESENT 1 +#define PDE32_RW (1U << 1) +#define PDE32_USER (1U << 2) +#define PDE32_PS (1U << 7) + +/* 64-bit page * entry bits */ +#define PDE64_PRESENT 1 +#define PDE64_RW (1U << 1) +#define PDE64_USER (1U << 2) +#define PDE64_ACCESSED (1U << 5) +#define PDE64_DIRTY (1U << 6) +#define PDE64_PS (1U << 7) +#define PDE64_G (1U << 8) + + +typedef struct vcpu { + int fd; + struct kvm_run *kvm_run; +} virtual_cpu; \ No newline at end of file diff --git a/tools/labs/templates/assignments/7-kvm-vmm/skel/include/vm.h b/tools/labs/templates/assignments/7-kvm-vmm/skel/include/vm.h new file mode 100644 index 00000000000000..5af83c85e5ef09 --- /dev/null +++ b/tools/labs/templates/assignments/7-kvm-vmm/skel/include/vm.h @@ -0,0 +1,9 @@ +#ifndef _VM_H_ +#define _VM_H_ + + +typedef struct vm { + int sys_fd; + int fd; + char *mem; +} virtual_machine; \ No newline at end of file diff --git a/tools/labs/templates/assignments/7-kvm-vmm/skel/payload.ld b/tools/labs/templates/assignments/7-kvm-vmm/skel/payload.ld new file mode 100644 index 00000000000000..7476a14372cfe8 --- /dev/null +++ b/tools/labs/templates/assignments/7-kvm-vmm/skel/payload.ld @@ -0,0 +1,13 @@ +SECTIONS +{ + .payload16 0 : { + guest16 = .; + guest16.o(.text) + guest16_end = .; + } + .payload64 0 : AT(LOADADDR(.payload16)+SIZEOF(.payload16)) { + guest64 = .; + guest64.img.o + guest64_end = .; + } +} \ No newline at end of file diff --git a/tools/labs/templates/assignments/7-kvm-vmm/skel/vmm.c b/tools/labs/templates/assignments/7-kvm-vmm/skel/vmm.c new file mode 100644 index 00000000000000..77caf419687514 --- /dev/null +++ b/tools/labs/templates/assignments/7-kvm-vmm/skel/vmm.c @@ -0,0 +1,72 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "vm.h" +#include "vcpu.h" + +#define UNUSED_PARAMETER(P) ((void)(P)) + +int main(int argc, char **argv) { + UNUSED_PARAMETER(argc); + UNUSED_PARAMETER(argv); + struct vm virtual_machine; + struct vcpu virtual_cpu; + + /* TODO: Initialize the VM. We will use 0x100000 bytes for the memory */ + /* TODO: Initialize the VCPU */ + /* TODO: Setup real mode. We will use guest_16_bits to test this. + /* TODO: IF real mode works all right. We can try to set up long mode*/ + + for (;;) { + /* TODO: Run the VCPU with KVM_RUN */ + + /* TODO: Handle VMEXITs */ + switch (vcpu->kvm_run->exit_reason) { + case KVM_EXIT_HLT: {goto check;} + case KVM_EXIT_MMIO: { + /* TODO: Handle MMIO read/write. Data is available in the shared memory at + vcpu->kvm_run */ + } + case KVM_EXIT_IO: { + /* TODO: Handle IO ports write (e.g. outb). Data is available in the shared memory + at vcpu->kvm_run. The data is at vcpu->kvm_run + vcpu->kvm_run->io.data_offset; */ + } + } + + fprintf(stderr, "\nGot exit_reason %d," + " expected KVM_EXIT_HLT (%d)\n", + vcpu->kvm_run->exit_reason, KVM_EXIT_HLT); + exit(1); + } + + /* We verify that the guest code ran accordingly */ + check: + if (ioctl(vcpu->fd, KVM_GET_REGS, ®s) < 0) { + perror("KVM_GET_REGS"); + exit(1); + } + + /* Verify that the guest has written 42 to RAX |*/ + if (regs.rax != 42) { + printf("Wrong result: {E,R,}AX is %lld\n", regs.rax); + return 0; + } + + /* Verify that the guest has written 42 at 0x400 */ + memcpy(&memval, &vm->mem[0x400], sz); + if (memval != 42) { + printf("Wrong result: memory at 0x400 is %lld\n", + (unsigned long long)memval); + return 0; + } + + printf("%s\n", "Finished vmm"); + return 0; +} \ No newline at end of file diff --git a/tools/labs/templates/block_device_drivers/1-2-3-6-ram-disk/kernel/Kbuild b/tools/labs/templates/block_device_drivers/1-2-3-6-ram-disk/kernel/Kbuild new file mode 100644 index 00000000000000..4f1b616bf92028 --- /dev/null +++ b/tools/labs/templates/block_device_drivers/1-2-3-6-ram-disk/kernel/Kbuild @@ -0,0 +1,3 @@ +EXTRA_CFLAGS = -Wall -g -Wno-unused + +obj-m = ram-disk.o diff --git a/tools/labs/templates/block_device_drivers/1-2-3-6-ram-disk/kernel/ram-disk.c b/tools/labs/templates/block_device_drivers/1-2-3-6-ram-disk/kernel/ram-disk.c new file mode 100644 index 00000000000000..ce8b17510c569c --- /dev/null +++ b/tools/labs/templates/block_device_drivers/1-2-3-6-ram-disk/kernel/ram-disk.c @@ -0,0 +1,261 @@ +/* + * SO2 - Block device drivers lab (#7) + * Linux - Exercise #1, #2, #3, #6 (RAM Disk) + */ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +MODULE_DESCRIPTION("Simple RAM Disk"); +MODULE_AUTHOR("SO2"); +MODULE_LICENSE("GPL"); + + +#define KERN_LOG_LEVEL KERN_ALERT + +#define MY_BLOCK_MAJOR 240 +#define MY_BLKDEV_NAME "mybdev" +#define MY_BLOCK_MINORS 1 +#define NR_SECTORS 128 + +#define KERNEL_SECTOR_SIZE 512 + +/* TODO 6/0: use bios for read/write requests */ +#define USE_BIO_TRANSFER 0 + + +static struct my_block_dev { + struct blk_mq_tag_set tag_set; + struct request_queue *queue; + struct gendisk *gd; + u8 *data; + size_t size; +} g_dev; + +static int my_block_open(struct block_device *bdev, fmode_t mode) +{ + return 0; +} + +static void my_block_release(struct gendisk *gd, fmode_t mode) +{ +} + +static const struct block_device_operations my_block_ops = { + .owner = THIS_MODULE, + .open = my_block_open, + .release = my_block_release +}; + +static void my_block_transfer(struct my_block_dev *dev, sector_t sector, + unsigned long len, char *buffer, int dir) +{ + unsigned long offset = sector * KERNEL_SECTOR_SIZE; + + /* check for read/write beyond end of block device */ + if ((offset + len) > dev->size) + return; + + /* TODO 3/4: read/write to dev buffer depending on dir */ + if (dir == 1) /* write */ + memcpy(dev->data + offset, buffer, len); + else + memcpy(buffer, dev->data + offset, len); +} + +/* to transfer data using bio structures enable USE_BIO_TRANFER */ +#if USE_BIO_TRANSFER == 1 +static void my_xfer_request(struct my_block_dev *dev, struct request *req) +{ + /* TODO 6/10: iterate segments */ + struct bio_vec bvec; + struct req_iterator iter; + + rq_for_each_segment(bvec, req, iter) { + sector_t sector = iter.iter.bi_sector; + unsigned long offset = bvec.bv_offset; + size_t len = bvec.bv_len; + int dir = bio_data_dir(iter.bio); + char *buffer = kmap_atomic(bvec.bv_page); + printk(KERN_LOG_LEVEL "%s: buf %8p offset %lu len %u dir %d\n", __func__, buffer, offset, len, dir); + + /* TODO 6/3: copy bio data to device buffer */ + my_block_transfer(dev, sector, len, buffer + offset, dir); + kunmap_atomic(buffer); + } +} +#endif + +static blk_status_t my_block_request(struct blk_mq_hw_ctx *hctx, + const struct blk_mq_queue_data *bd) +{ + struct request *rq; + struct my_block_dev *dev = hctx->queue->queuedata; + + /* TODO 2: get pointer to request */ + rq = bd->rq; + + /* TODO 2: start request processing. */ + blk_mq_start_request(rq); + + /* TODO 2/5: check fs request. Return if passthrough. */ + if (blk_rq_is_passthrough(rq)) { + printk(KERN_NOTICE "Skip non-fs request\n"); + blk_mq_end_request(rq, BLK_STS_IOERR); + goto out; + } + + /* TODO 2/6: print request information */ + printk(KERN_LOG_LEVEL + "request received: pos=%llu bytes=%u " + "cur_bytes=%u dir=%c\n", + (unsigned long long) blk_rq_pos(rq), + blk_rq_bytes(rq), blk_rq_cur_bytes(rq), + rq_data_dir(rq) ? 'W' : 'R'); + +#if USE_BIO_TRANSFER == 1 + /* TODO 6/1: process the request by calling my_xfer_request */ + my_xfer_request(dev, rq); +#else + /* TODO 3/3: process the request by calling my_block_transfer */ + my_block_transfer(dev, blk_rq_pos(rq), + blk_rq_bytes(rq), + bio_data(rq->bio), rq_data_dir(rq)); +#endif + + /* TODO 2/1: end request successfully */ + blk_mq_end_request(rq, BLK_STS_OK); + +out: + return BLK_STS_OK; +} + +static struct blk_mq_ops my_queue_ops = { + .queue_rq = my_block_request, +}; + +static int create_block_device(struct my_block_dev *dev) +{ + int err; + + dev->size = NR_SECTORS * KERNEL_SECTOR_SIZE; + dev->data = vmalloc(dev->size); + if (dev->data == NULL) { + printk(KERN_ERR "vmalloc: out of memory\n"); + err = -ENOMEM; + goto out_vmalloc; + } + + /* Initialize tag set. */ + dev->tag_set.ops = &my_queue_ops; + dev->tag_set.nr_hw_queues = 1; + dev->tag_set.queue_depth = 128; + dev->tag_set.numa_node = NUMA_NO_NODE; + dev->tag_set.cmd_size = 0; + dev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; + err = blk_mq_alloc_tag_set(&dev->tag_set); + if (err) { + printk(KERN_ERR "blk_mq_alloc_tag_set: can't allocate tag set\n"); + goto out_alloc_tag_set; + } + + /* Allocate queue. */ + dev->queue = blk_mq_init_queue(&dev->tag_set); + if (IS_ERR(dev->queue)) { + printk(KERN_ERR "blk_mq_init_queue: out of memory\n"); + err = -ENOMEM; + goto out_blk_init; + } + blk_queue_logical_block_size(dev->queue, KERNEL_SECTOR_SIZE); + dev->queue->queuedata = dev; + + /* initialize the gendisk structure */ + dev->gd = alloc_disk(MY_BLOCK_MINORS); + if (!dev->gd) { + printk(KERN_ERR "alloc_disk: failure\n"); + err = -ENOMEM; + goto out_alloc_disk; + } + + dev->gd->major = MY_BLOCK_MAJOR; + dev->gd->first_minor = 0; + dev->gd->fops = &my_block_ops; + dev->gd->queue = dev->queue; + dev->gd->private_data = dev; + snprintf(dev->gd->disk_name, DISK_NAME_LEN, "myblock"); + set_capacity(dev->gd, NR_SECTORS); + + add_disk(dev->gd); + + return 0; + +out_alloc_disk: + blk_cleanup_queue(dev->queue); +out_blk_init: + blk_mq_free_tag_set(&dev->tag_set); +out_alloc_tag_set: + vfree(dev->data); +out_vmalloc: + return err; +} + +static int __init my_block_init(void) +{ + int err = 0; + + /* TODO 1/5: register block device */ + err = register_blkdev(MY_BLOCK_MAJOR, MY_BLKDEV_NAME); + if (err < 0) { + printk(KERN_ERR "register_blkdev: unable to register\n"); + return err; + } + + /* TODO 2/3: create block device using create_block_device */ + err = create_block_device(&g_dev); + if (err < 0) + goto out; + + return 0; + +out: + /* TODO 2/1: unregister block device in case of an error */ + unregister_blkdev(MY_BLOCK_MAJOR, MY_BLKDEV_NAME); + return err; +} + +static void delete_block_device(struct my_block_dev *dev) +{ + if (dev->gd) { + del_gendisk(dev->gd); + put_disk(dev->gd); + } + + if (dev->queue) + blk_cleanup_queue(dev->queue); + if (dev->tag_set.tags) + blk_mq_free_tag_set(&dev->tag_set); + if (dev->data) + vfree(dev->data); +} + +static void __exit my_block_exit(void) +{ + /* TODO 2/1: cleanup block device using delete_block_device */ + delete_block_device(&g_dev); + + /* TODO 1/1: unregister block device */ + unregister_blkdev(MY_BLOCK_MAJOR, MY_BLKDEV_NAME); +} + +module_init(my_block_init); +module_exit(my_block_exit); diff --git a/tools/labs/templates/block_device_drivers/1-2-3-6-ram-disk/user/.gitignore b/tools/labs/templates/block_device_drivers/1-2-3-6-ram-disk/user/.gitignore new file mode 100644 index 00000000000000..1330ce0fdf63c7 --- /dev/null +++ b/tools/labs/templates/block_device_drivers/1-2-3-6-ram-disk/user/.gitignore @@ -0,0 +1 @@ +/ram-disk-test diff --git a/tools/labs/templates/block_device_drivers/1-2-3-6-ram-disk/user/Makefile b/tools/labs/templates/block_device_drivers/1-2-3-6-ram-disk/user/Makefile new file mode 100644 index 00000000000000..a653ce1e2faf79 --- /dev/null +++ b/tools/labs/templates/block_device_drivers/1-2-3-6-ram-disk/user/Makefile @@ -0,0 +1,8 @@ +CFLAGS = -Wall -g -m32 -static + +all: ram-disk-test + +.PHONY: clean + +clean: + -rm -f *~ *.o ram-disk-test diff --git a/tools/labs/templates/block_device_drivers/1-2-3-6-ram-disk/user/ram-disk-test.c b/tools/labs/templates/block_device_drivers/1-2-3-6-ram-disk/user/ram-disk-test.c new file mode 100644 index 00000000000000..a6d2b3a3214b8f --- /dev/null +++ b/tools/labs/templates/block_device_drivers/1-2-3-6-ram-disk/user/ram-disk-test.c @@ -0,0 +1,89 @@ +/* + * SO2 - Block device driver (#8) + * Test suite for exercise #3 (RAM Disk) + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#define NR_SECTORS 128 +#define SECTOR_SIZE 512 + +#define DEVICE_NAME "/dev/myblock" +#define MODULE_NAME "ram-disk" +#define MY_BLOCK_MAJOR "240" +#define MY_BLOCK_MINOR "0" + + +#define max_elem_value(elem) \ + (1 << 8*sizeof(elem)) + +static unsigned char buffer[SECTOR_SIZE]; +static unsigned char buffer_copy[SECTOR_SIZE]; + +static void test_sector(int fd, size_t sector) +{ + int i; + + for (i = 0; i < sizeof(buffer) / sizeof(buffer[0]); i++) + buffer[i] = rand() % max_elem_value(buffer[0]); + + lseek(fd, sector * SECTOR_SIZE, SEEK_SET); + write(fd, buffer, sizeof(buffer)); + + fsync(fd); + + lseek(fd, sector * SECTOR_SIZE, SEEK_SET); + read(fd, buffer_copy, sizeof(buffer_copy)); + + printf("test sector %3d ... ", sector); + if (memcmp(buffer, buffer_copy, sizeof(buffer_copy)) == 0) + printf("passed\n"); + else + printf("failed\n"); +} + +int main(void) +{ + int fd; + size_t i; + int back_errno; + + printf("insmod ../kernel/" MODULE_NAME ".ko\n"); + if (system("insmod ../kernel/" MODULE_NAME ".ko\n")) { + fprintf(stderr, "insmod failed\n"); + exit(EXIT_FAILURE); + } + + sleep(1); + + printf("mknod " DEVICE_NAME " b " MY_BLOCK_MAJOR " " MY_BLOCK_MINOR "\n"); + system("mknod " DEVICE_NAME " b " MY_BLOCK_MAJOR " " MY_BLOCK_MINOR "\n"); + sleep(1); + + fd = open(DEVICE_NAME, O_RDWR); + if (fd < 0) { + back_errno = errno; + perror("open"); + fprintf(stderr, "errno is %d\n", back_errno); + exit(EXIT_FAILURE); + } + + srand(time(NULL)); + for (i = 0; i < NR_SECTORS; i++) + test_sector(fd, i); + + close(fd); + + sleep(1); + printf("rmmod " MODULE_NAME "\n"); + system("rmmod " MODULE_NAME "\n"); + + return 0; +} diff --git a/tools/labs/templates/block_device_drivers/4-5-relay-disk/Kbuild b/tools/labs/templates/block_device_drivers/4-5-relay-disk/Kbuild new file mode 100644 index 00000000000000..222ee815adcb9e --- /dev/null +++ b/tools/labs/templates/block_device_drivers/4-5-relay-disk/Kbuild @@ -0,0 +1,3 @@ +EXTRA_CFLAGS = -Wall -g -Wno-unused + +obj-m = relay-disk.o diff --git a/tools/labs/templates/block_device_drivers/4-5-relay-disk/relay-disk.c b/tools/labs/templates/block_device_drivers/4-5-relay-disk/relay-disk.c new file mode 100644 index 00000000000000..a4f19570e3fafa --- /dev/null +++ b/tools/labs/templates/block_device_drivers/4-5-relay-disk/relay-disk.c @@ -0,0 +1,106 @@ +/* + * SO2 Lab - Block device drivers (#7) + * Linux - Exercise #4, #5 (Relay disk - bio) + */ + +#include +#include +#include +#include +#include +#include +#include + +MODULE_AUTHOR("SO2"); +MODULE_DESCRIPTION("Relay disk"); +MODULE_LICENSE("GPL"); + +#define KERN_LOG_LEVEL KERN_ALERT + +#define PHYSICAL_DISK_NAME "/dev/vdb" +#define KERNEL_SECTOR_SIZE 512 + +#define BIO_WRITE_MESSAGE "def" + + +/* pointer to physical device structure */ +static struct block_device *phys_bdev; + +static void send_test_bio(struct block_device *bdev, int dir) +{ + struct bio *bio = bio_alloc(GFP_NOIO, 1); + struct page *page; + char *buf; + + /* TODO 4/3: fill bio (bdev, sector, direction) */ + bio->bi_disk = bdev->bd_disk; + bio->bi_iter.bi_sector = 0; + bio->bi_opf = dir; + + page = alloc_page(GFP_NOIO); + bio_add_page(bio, page, KERNEL_SECTOR_SIZE, 0); + + /* TODO 5/5: write message to bio buffer if direction is write */ + if (dir == REQ_OP_WRITE) { + buf = kmap_atomic(page); + memcpy(buf, BIO_WRITE_MESSAGE, strlen(BIO_WRITE_MESSAGE)); + kunmap_atomic(buf); + } + + /* TODO 4/3: submit bio and wait for completion */ + printk(KERN_LOG_LEVEL "[send_test_bio] Submiting bio\n"); + submit_bio_wait(bio); + printk(KERN_LOG_LEVEL "[send_test_bio] Done bio\n"); + + /* TODO 4/3: read data (first 3 bytes) from bio buffer and print it */ + buf = kmap_atomic(page); + printk(KERN_LOG_LEVEL "read %02x %02x %02x\n", buf[0], buf[1], buf[2]); + kunmap_atomic(buf); + + bio_put(bio); + __free_page(page); +} + +static struct block_device *open_disk(char *name) +{ + struct block_device *bdev; + + /* TODO 4/5: get block device in exclusive mode */ + bdev = blkdev_get_by_path(name, FMODE_READ | FMODE_WRITE | FMODE_EXCL, THIS_MODULE); + if (IS_ERR(bdev)) { + printk(KERN_ERR "blkdev_get_by_path\n"); + return NULL; + } + + return bdev; +} + +static int __init relay_init(void) +{ + phys_bdev = open_disk(PHYSICAL_DISK_NAME); + if (phys_bdev == NULL) { + printk(KERN_ERR "[relay_init] No such device\n"); + return -EINVAL; + } + + send_test_bio(phys_bdev, REQ_OP_READ); + + return 0; +} + +static void close_disk(struct block_device *bdev) +{ + /* TODO 4/1: put block device */ + blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); +} + +static void __exit relay_exit(void) +{ + /* TODO 5/1: send test write bio */ + send_test_bio(phys_bdev, REQ_OP_WRITE); + + close_disk(phys_bdev); +} + +module_init(relay_init); +module_exit(relay_exit); diff --git a/tools/labs/templates/block_device_drivers/4-5-relay-disk/test-relay-disk b/tools/labs/templates/block_device_drivers/4-5-relay-disk/test-relay-disk new file mode 100755 index 00000000000000..f73aad004ea50c --- /dev/null +++ b/tools/labs/templates/block_device_drivers/4-5-relay-disk/test-relay-disk @@ -0,0 +1,20 @@ +#!/bin/sh + +PHYSICAL_DISK_NAME="/dev/vdb" +TMP_FILE="/tmp/disk_data" +echo "abc" > "$PHYSICAL_DISK_NAME" +if ! insmod relay-disk.ko +then + echo "insmod failed" + exit 1 +fi +if ! rmmod relay-disk +then + echo "rmmod failed" + exit 1 +fi +sleep 1 + +echo -n "read from $PHYSICAL_DISK_NAME: " +dd if=$PHYSICAL_DISK_NAME of=$TMP_FILE count=3 bs=1 &> /dev/null +cat $TMP_FILE | hexdump -v -e '/1 "%02X "'; echo diff --git a/tools/labs/templates/debugging/dumpstack/Kbuild b/tools/labs/templates/debugging/dumpstack/Kbuild new file mode 100644 index 00000000000000..6e5dcf0a1c349a --- /dev/null +++ b/tools/labs/templates/debugging/dumpstack/Kbuild @@ -0,0 +1,3 @@ +ccflags-y = -Wno-unused-function -Wno-unused-label -Wno-unused-variable + +obj-m = dumpstack.o diff --git a/tools/labs/templates/debugging/dumpstack/dumpstack.c b/tools/labs/templates/debugging/dumpstack/dumpstack.c new file mode 100644 index 00000000000000..d5a1c1a47dfeb7 --- /dev/null +++ b/tools/labs/templates/debugging/dumpstack/dumpstack.c @@ -0,0 +1,36 @@ +#include + +static noinline void foo3(void) +{ + pr_info("foo3()\n"); + dump_stack(); +} + +static noinline void foo2(void) +{ + pr_info("foo2()\n"); + foo3(); +} + +static noinline void foo1(void) +{ + pr_info("foo1()\n"); + foo2(); +} + +static int so2_dumpstack_init(void) +{ + pr_info("dumpstack_init\n"); + foo1(); + + return 0; +} + +static void so2_dumpstack_exit(void) +{ + pr_info("dumpstack exit\n"); +} + +MODULE_LICENSE("GPL v2"); +module_init(so2_dumpstack_init); +module_exit(so2_dumpstack_exit); diff --git a/tools/labs/templates/debugging/leak/Kbuild b/tools/labs/templates/debugging/leak/Kbuild new file mode 100644 index 00000000000000..bf0dc5216a8434 --- /dev/null +++ b/tools/labs/templates/debugging/leak/Kbuild @@ -0,0 +1,3 @@ +ccflags-y = -Wno-unused-function -Wno-unused-label -Wno-unused-variable + +obj-m = leak.o diff --git a/tools/labs/templates/debugging/leak/leak.c b/tools/labs/templates/debugging/leak/leak.c new file mode 100644 index 00000000000000..26ab404f255ff2 --- /dev/null +++ b/tools/labs/templates/debugging/leak/leak.c @@ -0,0 +1,23 @@ +#include +#include +#include +#include +#include +#include + +static int leak_init(void) +{ + pr_info("%s\n", __func__); + + (void)kmalloc(16, GFP_KERNEL); + + return 0; +} + +static void leak_exit(void) +{ +} + +MODULE_LICENSE("GPL v2"); +module_init(leak_init); +module_exit(leak_exit); diff --git a/tools/labs/templates/debugging/list/Kbuild b/tools/labs/templates/debugging/list/Kbuild new file mode 100644 index 00000000000000..7187139dbdb7af --- /dev/null +++ b/tools/labs/templates/debugging/list/Kbuild @@ -0,0 +1,3 @@ +ccflags-y = -Wno-unused-function -Wno-unused-label -Wno-unused-variable + +obj-m = list.o diff --git a/tools/labs/templates/debugging/list/list.c b/tools/labs/templates/debugging/list/list.c new file mode 100644 index 00000000000000..ce76e3510ef203 --- /dev/null +++ b/tools/labs/templates/debugging/list/list.c @@ -0,0 +1,79 @@ +#include +#include +#include +#include +#include +#include + +static char *op = "ubi"; + +module_param(op, charp, 0000); +MODULE_PARM_DESC(op, "List error type"); + +struct list_m { + int a; + struct list_head list; +}; + +LIST_HEAD(head); + +static noinline void use_before_init(void) +{ + struct list_m *m = kmalloc(sizeof(*m), GFP_KERNEL); + + pr_info("use before init\n"); + + list_del(&m->list); +} + +static noinline void use_after_free(void) +{ + struct list_m *m = kmalloc(sizeof(*m), GFP_KERNEL); + + pr_info("use after free\n"); + + kfree(m); + list_del(&m->list); +} + +static noinline void crush(void) +{ + struct list_m e1, e2; + struct list_head *i; + + e1.a = 2; + e2.a = 3; + + list_add(&e1.list, &head); + list_add(&e2.list, &head); + + list_for_each(i, &head) { + struct list_m *x = list_entry(i, struct list_m, list); + + pr_info("list_for each %p\n", &x->a); + list_del(&x->list); + } +} + +static int so2_list_init(void) +{ + pr_info("list_init with op %s\n", op); + + /* use before init */ + if (strncmp(op, "ubi", 3) == 0) + use_before_init(); + if (strncmp(op, "uaf", 3) == 0) + use_after_free(); + if (strncmp(op, "crush", 5) == 0) + crush(); + + return 0; +} + +static void so2_list_exit(void) +{ +} + +MODULE_LICENSE("GPL v2"); +module_init(so2_list_init); +module_exit(so2_list_exit); diff --git a/tools/labs/templates/debugging/locking/Kbuild b/tools/labs/templates/debugging/locking/Kbuild new file mode 100644 index 00000000000000..4656b8c1ac6c62 --- /dev/null +++ b/tools/labs/templates/debugging/locking/Kbuild @@ -0,0 +1,3 @@ +ccflags-y = -Wno-unused-function -Wno-unused-label -Wno-unused-variable + +obj-m = locking.o diff --git a/tools/labs/templates/debugging/locking/locking.c b/tools/labs/templates/debugging/locking/locking.c new file mode 100644 index 00000000000000..12a11511e91fe0 --- /dev/null +++ b/tools/labs/templates/debugging/locking/locking.c @@ -0,0 +1,42 @@ +#include +#include + +static DEFINE_MUTEX(a); +static DEFINE_MUTEX(b); + +static noinline int thread_a(void *unused) +{ + mutex_lock(&a); pr_info("%s acquired A\n", __func__); + mutex_lock(&b); pr_info("%s acquired B\n", __func__); + + mutex_unlock(&b); + mutex_unlock(&a); + + return 0; +} + +static noinline int thread_b(void *unused) +{ + mutex_lock(&b); pr_info("%s acquired B\n", __func__); + mutex_lock(&a); pr_info("%s acquired A\n", __func__); + + mutex_unlock(&a); + mutex_unlock(&b); + + return 0; +} + + +int init_module(void) +{ + kthread_run(thread_a, NULL, "thread_a"); + kthread_run(thread_b, NULL, "thread_b"); + + return 0; +} + +void exit_module(void) +{ +} + +MODULE_LICENSE("GPL v2"); diff --git a/tools/labs/templates/debugging/locking2/Kbuild b/tools/labs/templates/debugging/locking2/Kbuild new file mode 100644 index 00000000000000..639f09cc298826 --- /dev/null +++ b/tools/labs/templates/debugging/locking2/Kbuild @@ -0,0 +1,3 @@ +ccflags-y = -Wno-unused-function -Wno-unused-label -Wno-unused-variable + +obj-m = locking2.o diff --git a/tools/labs/templates/debugging/locking2/locking2.c b/tools/labs/templates/debugging/locking2/locking2.c new file mode 100644 index 00000000000000..0905dcb0830192 --- /dev/null +++ b/tools/labs/templates/debugging/locking2/locking2.c @@ -0,0 +1,30 @@ +#include +#include + +static DEFINE_SPINLOCK(lock); + +static void timerfn(struct timer_list *unused) +{ + pr_info("%s acquiring lock\n", __func__); + spin_lock(&lock); pr_info("%s acquired lock\n", __func__); + spin_unlock(&lock); pr_info("%s released lock\n", __func__); +} + +static DEFINE_TIMER(timer, timerfn); + +int init_module(void) +{ + mod_timer(&timer, jiffies); + + pr_info("%s acquiring lock\n", __func__); + spin_lock(&lock); pr_info("%s acquired lock\n", __func__); + spin_unlock(&lock); pr_info("%s released lock\n", __func__); + return 0; +} + +void exit_module(void) +{ + del_timer_sync(&timer); +} + +MODULE_LICENSE("GPL v2"); diff --git a/tools/labs/templates/debugging/oops/Kbuild b/tools/labs/templates/debugging/oops/Kbuild new file mode 100644 index 00000000000000..3916b6c818523b --- /dev/null +++ b/tools/labs/templates/debugging/oops/Kbuild @@ -0,0 +1,3 @@ +ccflags-y = -Wno-unused-function -Wno-unused-label -Wno-unused-variable + +obj-m = oops.o diff --git a/tools/labs/templates/debugging/oops/oops.c b/tools/labs/templates/debugging/oops/oops.c new file mode 100644 index 00000000000000..26390a510a9904 --- /dev/null +++ b/tools/labs/templates/debugging/oops/oops.c @@ -0,0 +1,23 @@ +#include + +static noinline void do_oops(void) +{ + *(int*)0x42 = 'a'; +} + +static int so2_oops_init(void) +{ + pr_info("oops_init\n"); + do_oops(); + + return 0; +} + +static void so2_oops_exit(void) +{ + pr_info("oops exit\n"); +} + +MODULE_LICENSE("GPL v2"); +module_init(so2_oops_init); +module_exit(so2_oops_exit); diff --git a/tools/labs/templates/debugging/panic/Kbuild b/tools/labs/templates/debugging/panic/Kbuild new file mode 100644 index 00000000000000..cefb5c9f0de161 --- /dev/null +++ b/tools/labs/templates/debugging/panic/Kbuild @@ -0,0 +1,3 @@ +ccflags-y = -Wno-unused-function -Wno-unused-label -Wno-unused-variable + +obj-m = panic.o diff --git a/tools/labs/templates/debugging/panic/panic.c b/tools/labs/templates/debugging/panic/panic.c new file mode 100644 index 00000000000000..440ead94d15d2e --- /dev/null +++ b/tools/labs/templates/debugging/panic/panic.c @@ -0,0 +1,29 @@ +#include +#include + +static struct timer_list panic_timer; + +static void do_panic(struct timer_list *unused) +{ + *(int*)0x42 = 'a'; +} + +static int so2_panic_init(void) +{ + pr_info("panic_init\n"); + + timer_setup(&panic_timer, do_panic, 0); + mod_timer(&panic_timer, jiffies + 2 * HZ); + + return 0; +} + +static void so2_panic_exit(void) +{ + pr_info("panic exit\n"); + del_timer_sync(&panic_timer); +} + +MODULE_LICENSE("GPL v2"); +module_init(so2_panic_init); +module_exit(so2_panic_exit); diff --git a/tools/labs/templates/deferred_work/1-2-timer/Kbuild b/tools/labs/templates/deferred_work/1-2-timer/Kbuild new file mode 100644 index 00000000000000..fa3cd3e263f84b --- /dev/null +++ b/tools/labs/templates/deferred_work/1-2-timer/Kbuild @@ -0,0 +1,3 @@ +ccflags-y = -Wno-unused-function -Wno-unused-label -Wno-unused-variable + +obj-m = timer.o diff --git a/tools/labs/templates/deferred_work/1-2-timer/timer.c b/tools/labs/templates/deferred_work/1-2-timer/timer.c new file mode 100644 index 00000000000000..eba76bffc9135d --- /dev/null +++ b/tools/labs/templates/deferred_work/1-2-timer/timer.c @@ -0,0 +1,54 @@ +/* + * Deferred Work + * + * Exercise #1, #2: simple timer + */ + +#include +#include +#include +#include + +MODULE_DESCRIPTION("Simple kernel timer"); +MODULE_AUTHOR("SO2"); +MODULE_LICENSE("GPL"); + +#define TIMER_TIMEOUT 1 + +static struct timer_list timer; + +static void timer_handler(struct timer_list *tl) +{ + /* TODO 1/4: print a message */ + static size_t nseconds; + + nseconds += TIMER_TIMEOUT; + pr_info("[timer_handler] nseconds = %d\n", nseconds); + + /* TODO 2: rechedule timer */ + mod_timer(tl, jiffies + TIMER_TIMEOUT * HZ); +} + +static int __init timer_init(void) +{ + pr_info("[timer_init] Init module\n"); + + /* TODO 1: initialize timer */ + timer_setup(&timer, timer_handler, 0); + + /* TODO 1: schedule timer for the first time */ + mod_timer(&timer, jiffies + TIMER_TIMEOUT * HZ); + + return 0; +} + +static void __exit timer_exit(void) +{ + pr_info("[timer_exit] Exit module\n"); + + /* TODO 1: cleanup; make sure the timer is not running after we exit */ + del_timer_sync(&timer); +} + +module_init(timer_init); +module_exit(timer_exit); diff --git a/tools/labs/templates/deferred_work/3-4-5-deferred/include/deferred.h b/tools/labs/templates/deferred_work/3-4-5-deferred/include/deferred.h new file mode 100644 index 00000000000000..f9408c704401ad --- /dev/null +++ b/tools/labs/templates/deferred_work/3-4-5-deferred/include/deferred.h @@ -0,0 +1,35 @@ +/* + * SO2 - Lab 6 - Deferred Work + * + * Exercises #3, #4, #5: deferred work + * + * Header file. + */ + +#ifndef __DEFERRED_H__ +#define __DEFERRED_H__ + +#include + +#define MY_IOCTL_TIMER_SET _IOW('k', 1, unsigned long) +#define MY_IOCTL_TIMER_CANCEL _IO ('k', 2) +#define MY_IOCTL_TIMER_ALLOC _IOW('k', 3, unsigned long) +#define MY_IOCTL_TIMER_MON _IO ('k', 4) + +/* converts ioctl command code to message */ +inline static char *ioctl_command_to_string(int cmd) +{ + switch(cmd) { + case MY_IOCTL_TIMER_SET: + return "Set timer"; + case MY_IOCTL_TIMER_CANCEL: + return "Cancel timer"; + case MY_IOCTL_TIMER_ALLOC: + return "Allocate memory"; + case MY_IOCTL_TIMER_MON: + return "Monitor pid"; + } + return "Unknown command"; +} + +#endif /* __DEFERRED_H__ */ diff --git a/tools/labs/templates/deferred_work/3-4-5-deferred/kernel/Kbuild b/tools/labs/templates/deferred_work/3-4-5-deferred/kernel/Kbuild new file mode 100644 index 00000000000000..fa3f727c8a53d2 --- /dev/null +++ b/tools/labs/templates/deferred_work/3-4-5-deferred/kernel/Kbuild @@ -0,0 +1,3 @@ +ccflags-y = -Wno-unused-function -Wno-unused-label -Wno-unused-variable + +obj-m = deferred.o diff --git a/tools/labs/templates/deferred_work/3-4-5-deferred/kernel/deferred.c b/tools/labs/templates/deferred_work/3-4-5-deferred/kernel/deferred.c new file mode 100644 index 00000000000000..3b8e4994056dc2 --- /dev/null +++ b/tools/labs/templates/deferred_work/3-4-5-deferred/kernel/deferred.c @@ -0,0 +1,262 @@ +/* + * SO2 - Lab 6 - Deferred Work + * + * Exercises #3, #4, #5: deferred work + * + * Code skeleton. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "../include/deferred.h" + +#define MY_MAJOR 42 +#define MY_MINOR 0 +#define MODULE_NAME "deferred" + +#define TIMER_TYPE_NONE -1 +#define TIMER_TYPE_SET 0 +#define TIMER_TYPE_ALLOC 1 +#define TIMER_TYPE_MON 2 + +MODULE_DESCRIPTION("Deferred work character device"); +MODULE_AUTHOR("SO2"); +MODULE_LICENSE("GPL"); + +struct mon_proc { + struct task_struct *task; + struct list_head list; +}; + +static struct my_device_data { + struct cdev cdev; + /* TODO 1: add timer */ + struct timer_list timer; + /* TODO 2: add flag */ + int flag; + /* TODO 3: add work */ + struct work_struct work; + /* TODO 4: add list for monitored processes */ + struct list_head list; + /* TODO 4: add spinlock to protect list */ + spinlock_t lock; +} dev; + +static void alloc_io(void) +{ + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(5 * HZ); + pr_info("Yawn! I've been sleeping for 5 seconds.\n"); +} + +static struct mon_proc *get_proc(pid_t pid) +{ + struct task_struct *task; + struct mon_proc *p; + + rcu_read_lock(); + task = pid_task(find_vpid(pid), PIDTYPE_PID); + rcu_read_unlock(); + if (!task) + return ERR_PTR(-ESRCH); + + p = kmalloc(sizeof(*p), GFP_ATOMIC); + if (!p) + return ERR_PTR(-ENOMEM); + + get_task_struct(task); + p->task = task; + + return p; +} + + +/* TODO 3/4: define work handler */ +static void work_handler(struct work_struct *work) +{ + alloc_io(); +} + +#define ALLOC_IO_DIRECT +/* TODO 3: undef ALLOC_IO_DIRECT*/ +#undef ALLOC_IO_DIRECT + +static void timer_handler(struct timer_list *tl) +{ + /* TODO 1/44: implement timer handler */ + struct my_device_data *my_data = from_timer(my_data, tl, timer); + + pr_info("[timer_handler] pid = %d, comm = %s\n", + current->pid, current->comm); + + /* TODO 2/38: check flags: TIMER_TYPE_SET or TIMER_TYPE_ALLOC */ + switch (my_data->flag) { + case TIMER_TYPE_SET: + break; + case TIMER_TYPE_ALLOC: +#ifdef ALLOC_IO_DIRECT + alloc_io(); +#else + /* TODO 3: schedule work */ + schedule_work(&my_data->work); +#endif + break; + case TIMER_TYPE_MON: + { + /* TODO 4/19: iterate the list and check the proccess state */ + struct mon_proc *p, *n; + + spin_lock(&my_data->lock); + list_for_each_entry_safe(p, n, &my_data->list, list) { + /* TODO 4: if task is dead print info ... */ + /* TODO 4: ... decrement task usage counter ... */ + /* TODO 4: ... remove it from the list ... */ + /* TODO 4: ... free the struct mon_proc */ + if (p->task->state == TASK_DEAD) { + pr_info("task %s (%d) is dead\n", p->task->comm, + p->task->pid); + put_task_struct(p->task); + list_del(&p->list); + kfree(p); + } + } + spin_unlock(&my_data->lock); + + mod_timer(&my_data->timer, jiffies + HZ); + break; + } + default: + break; + } +} + +static int deferred_open(struct inode *inode, struct file *file) +{ + struct my_device_data *my_data = + container_of(inode->i_cdev, struct my_device_data, cdev); + file->private_data = my_data; + pr_info("[deferred_open] Device opened\n"); + return 0; +} + +static int deferred_release(struct inode *inode, struct file *file) +{ + pr_info("[deferred_release] Device released\n"); + return 0; +} + +static long deferred_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + struct my_device_data *my_data = (struct my_device_data*) file->private_data; + + pr_info("[deferred_ioctl] Command: %s\n", ioctl_command_to_string(cmd)); + + switch (cmd) { + case MY_IOCTL_TIMER_SET: + /* TODO 2: set flag */ + my_data->flag = TIMER_TYPE_SET; + /* TODO 1: schedule timer */ + mod_timer(&my_data->timer, jiffies + arg * HZ); + break; + case MY_IOCTL_TIMER_CANCEL: + /* TODO 1: cancel timer */ + del_timer(&my_data->timer); + break; + case MY_IOCTL_TIMER_ALLOC: + /* TODO 2/2: set flag and schedule timer */ + my_data->flag = TIMER_TYPE_ALLOC; + mod_timer(&my_data->timer, jiffies + arg * HZ); + break; + case MY_IOCTL_TIMER_MON: + { + /* TODO 4/8: use get_proc() and add task to list */ + struct mon_proc *p = get_proc(arg); + if (IS_ERR(p)) + return PTR_ERR(p); + + /* TODO 4: protect access to list */ + spin_lock_bh(&my_data->lock); + list_add(&p->list, &my_data->list); + spin_unlock_bh(&my_data->lock); + + /* TODO 4/2: set flag and schedule timer */ + my_data->flag = TIMER_TYPE_MON; + mod_timer(&my_data->timer, jiffies + HZ); + break; + } + default: + return -ENOTTY; + } + return 0; +} + +struct file_operations my_fops = { + .owner = THIS_MODULE, + .open = deferred_open, + .release = deferred_release, + .unlocked_ioctl = deferred_ioctl, +}; + +static int deferred_init(void) +{ + int err; + + pr_info("[deferred_init] Init module\n"); + err = register_chrdev_region(MKDEV(MY_MAJOR, MY_MINOR), 1, MODULE_NAME); + if (err) { + pr_info("[deffered_init] register_chrdev_region: %d\n", err); + return err; + } + + /* TODO 2: Initialize flag. */ + dev.flag = TIMER_TYPE_NONE; + /* TODO 3: Initialize work. */ + INIT_WORK(&dev.work, work_handler); + + /* TODO 4/2: Initialize lock and list. */ + spin_lock_init(&dev.lock); + INIT_LIST_HEAD(&dev.list); + + cdev_init(&dev.cdev, &my_fops); + cdev_add(&dev.cdev, MKDEV(MY_MAJOR, MY_MINOR), 1); + + /* TODO 1: Initialize timer. */ + timer_setup(&dev.timer, timer_handler, 0); + + return 0; +} + +static void deferred_exit(void) +{ + struct mon_proc *p, *n; + + pr_info("[deferred_exit] Exit module\n" ); + + cdev_del(&dev.cdev); + unregister_chrdev_region(MKDEV(MY_MAJOR, MY_MINOR), 1); + + /* TODO 1: Cleanup: make sure the timer is not running after exiting. */ + del_timer_sync(&dev.timer); + /* TODO 3: Cleanup: make sure the work handler is not scheduled. */ + flush_scheduled_work(); + + /* TODO 4/8: Cleanup the monitered process list */ + list_for_each_entry_safe(p, n, &dev.list, list) { + /* TODO 4: ... decrement task usage counter ... */ + /* TODO 4: ... remove it from the list ... */ + /* TODO 4: ... free the struct mon_proc */ + put_task_struct(p->task); + list_del(&p->list); + kfree(p); + } +} + +module_init(deferred_init); +module_exit(deferred_exit); diff --git a/tools/labs/templates/deferred_work/3-4-5-deferred/kernel/makenode b/tools/labs/templates/deferred_work/3-4-5-deferred/kernel/makenode new file mode 100755 index 00000000000000..1e46669d4a1c98 --- /dev/null +++ b/tools/labs/templates/deferred_work/3-4-5-deferred/kernel/makenode @@ -0,0 +1,9 @@ +#!/bin/sh + +device=deferred +type=c +major=42 +minor=0 + +rm -f /dev/${device} +mknod /dev/${device} $type $major $minor && ls -al /dev/${device} diff --git a/tools/labs/templates/deferred_work/3-4-5-deferred/user/.gitignore b/tools/labs/templates/deferred_work/3-4-5-deferred/user/.gitignore new file mode 100644 index 00000000000000..ee4c92682341e4 --- /dev/null +++ b/tools/labs/templates/deferred_work/3-4-5-deferred/user/.gitignore @@ -0,0 +1 @@ +/test diff --git a/tools/labs/templates/deferred_work/3-4-5-deferred/user/Makefile b/tools/labs/templates/deferred_work/3-4-5-deferred/user/Makefile new file mode 100644 index 00000000000000..62768622c42fee --- /dev/null +++ b/tools/labs/templates/deferred_work/3-4-5-deferred/user/Makefile @@ -0,0 +1,9 @@ +CFLAGS=-Wall -m32 +LDFLAGS=-static -m32 + +test: test.o + +.PHONY: clean + +clean: + -rm -f *~ *.o test diff --git a/tools/labs/templates/deferred_work/3-4-5-deferred/user/test.c b/tools/labs/templates/deferred_work/3-4-5-deferred/user/test.c new file mode 100644 index 00000000000000..3cef70f86e2b75 --- /dev/null +++ b/tools/labs/templates/deferred_work/3-4-5-deferred/user/test.c @@ -0,0 +1,93 @@ +/* + * SO2 - Lab 6 - Deferred Work + * + * Exercises #3, #4, #5: deferred work + * + * User-mode test program. + */ + +#include +#include +#include +#include +#include +#include +#include "../include/deferred.h" + +#define DEVICE_PATH "/dev/deferred" + +/* prints error message and exits */ +void error(char *message) +{ + perror(message); + exit(EXIT_FAILURE); +} + +/* prints usage message and exits */ +void usage() +{ + printf("Usage: test \n options:\n" + "\ts - set timer to run after seconds\n" + "\tc - cancel timer\n" + "\ta - allocate memory after seconds\n" + "\tp - monitor pid\n" + "\n"); + exit(1); +} + +#define BUFFER_LEN 128 + +int main(int argc, char **argv) +{ + int fd; + unsigned long seconds, pid; + + if (argc < 2) + usage(); + + fd = open(DEVICE_PATH, O_RDONLY); + if (fd < 0) + error(DEVICE_PATH); + + switch (argv[1][0]) { + case 's': + /* Set timer. */ + if (argc < 3) + usage(); + seconds = atoi(argv[2]); + printf("Set timer to %ld seconds\n", seconds); + if (ioctl(fd, MY_IOCTL_TIMER_SET, seconds) < 0) + error("ioctl set timer error"); + break; + case 'c': + /* Cancel timer. */ + printf("Cancel timer\n"); + if (ioctl(fd, MY_IOCTL_TIMER_CANCEL) < 0) + error("ioctl cancel timer error"); + break; + case 'a': + /* Allocate memory. */ + if (argc < 3) + usage(); + seconds = atoi(argv[2]); + printf("Allocate memory after %ld seconds\n",seconds); + if (ioctl(fd, MY_IOCTL_TIMER_ALLOC, seconds) < 0) + error("ioctl allocate memory error"); + break; + case 'p': + /* Monitor pid. */ + if (argc < 3) + usage(); + pid = atoi(argv[2]); + printf("Monitor PID %lu.\n", pid); + if (ioctl(fd, MY_IOCTL_TIMER_MON, pid) < 0) + error("ioctl monitor pid error"); + break; + default: + error("Wrong parameter"); + } + + close(fd); + + return 0; +} diff --git a/tools/labs/templates/deferred_work/6-kthread/Kbuild b/tools/labs/templates/deferred_work/6-kthread/Kbuild new file mode 100644 index 00000000000000..028c060071dd8c --- /dev/null +++ b/tools/labs/templates/deferred_work/6-kthread/Kbuild @@ -0,0 +1,3 @@ +ccflags-y = -Wno-unused-function -Wno-unused-label -Wno-unused-variable + +obj-m = kthread.o diff --git a/tools/labs/templates/deferred_work/6-kthread/kthread.c b/tools/labs/templates/deferred_work/6-kthread/kthread.c new file mode 100644 index 00000000000000..d47ce9619dce06 --- /dev/null +++ b/tools/labs/templates/deferred_work/6-kthread/kthread.c @@ -0,0 +1,65 @@ +/* + * SO2 - Lab 6 - Deferred Work + * + * Exercise #6: kernel thread + */ + +#include +#include +#include +#include +#include +#include + +MODULE_DESCRIPTION("Simple kernel thread"); +MODULE_AUTHOR("SO2"); +MODULE_LICENSE("GPL"); + +wait_queue_head_t wq_stop_thread; +atomic_t flag_stop_thread; +wait_queue_head_t wq_thread_terminated; +atomic_t flag_thread_terminated; + + +int my_thread_f(void *data) +{ + pr_info("[my_thread_f] Current process id is %d (%s)\n", + current->pid, current->comm); + /* TODO: Wait for command to remove module on wq_stop_thread queue. */ + wait_event_interruptible(wq_stop_thread, atomic_read(&flag_stop_thread) != 0); + + /* TODO: set flag to mark kernel thread termination */ + atomic_set(&flag_thread_terminated, 1); + /* TODO: notify the unload process that we have exited */ + wake_up_interruptible(&wq_thread_terminated); + pr_info("[my_thread_f] Exiting\n"); + do_exit(0); +} + +static int __init kthread_init(void) +{ + pr_info("[kthread_init] Init module\n"); + + /* TODO/4: init the waitqueues and flags */ + init_waitqueue_head(&wq_stop_thread); + atomic_set(&flag_stop_thread, 0); + init_waitqueue_head(&wq_thread_terminated); + atomic_set(&flag_thread_terminated, 0); + /* TODO: create and start the kernel thread */ + kthread_run(my_thread_f, NULL, "%skthread%d", "my", 0); + + return 0; +} + +static void __exit kthread_exit(void) +{ + /* TODO/2: notify the kernel thread that its time to exit */ + atomic_set(&flag_stop_thread, 1); + wake_up_interruptible(&wq_stop_thread); + /* TODO: wait for the kernel thread to exit */ + wait_event_interruptible(wq_thread_terminated, atomic_read(&flag_thread_terminated) != 0); + pr_info("[kthread_exit] Exit module\n"); +} + +module_init(kthread_init); +module_exit(kthread_exit); diff --git a/tools/labs/templates/device_drivers/extra/char-driver-lin/Kbuild b/tools/labs/templates/device_drivers/extra/char-driver-lin/Kbuild new file mode 100644 index 00000000000000..9399052a035415 --- /dev/null +++ b/tools/labs/templates/device_drivers/extra/char-driver-lin/Kbuild @@ -0,0 +1,3 @@ +ccflags-y = -Wno-unused-function -Wno-unused-label -Wno-unused-variable + +obj-m = modul.o diff --git a/tools/labs/templates/device_drivers/extra/char-driver-lin/makenode b/tools/labs/templates/device_drivers/extra/char-driver-lin/makenode new file mode 100755 index 00000000000000..e66abd3c6a1cd0 --- /dev/null +++ b/tools/labs/templates/device_drivers/extra/char-driver-lin/makenode @@ -0,0 +1,9 @@ +#!/bin/sh +device="my_device" +major=42 + + +rm -f /dev/${device} +mknod /dev/${device} c $major 0 + + diff --git a/tools/labs/templates/device_drivers/extra/char-driver-lin/modul.c b/tools/labs/templates/device_drivers/extra/char-driver-lin/modul.c new file mode 100644 index 00000000000000..b92e74fabfded6 --- /dev/null +++ b/tools/labs/templates/device_drivers/extra/char-driver-lin/modul.c @@ -0,0 +1,124 @@ +#include +#include +#include +#include +#include +#include + +#define MY_MAJOR 42 +#define MY_MAX_MINORS 2 +/* #define IOCTL_IN _IOC(_IOC_WRITE, 'k', 1, sizeof(my_ioctl_data)) */ +#define MY_IOCTL_IN _IOC(_IOC_WRITE, 'k', 1, 0) + +struct my_device_data { + struct cdev cdev; + /* my data starts here */ +}devs[MY_MAX_MINORS]; + +MODULE_DESCRIPTION("My kernel module"); +MODULE_AUTHOR("Me"); +MODULE_LICENSE("GPL"); + +static int my_open(struct inode *inode, struct file *file) { + struct my_device_data *my_data = + container_of(inode->i_cdev, struct my_device_data, cdev); + + printk( KERN_DEBUG "[my_open]\n" ); + /* validate access to device */ + file->private_data = my_data; + /* initialize device */ + + return 0; +} + +static int my_close(struct inode *inode, struct file *file) { + printk( KERN_DEBUG "[my_close]\n" ); + /* deinitialize device */ + return 0; +} + +static int my_read(struct file *file, char __user *user_buffer, size_t size, loff_t *offset) { + struct my_device_data *my_data = + (struct my_device_data*) file->private_data; + int sizeRead = 0; + + printk( KERN_DEBUG "[my_read]\n" ); + /* read data from device in my_data->buffer */ + /* if(copy_to_user(user_buffer, my_data->buffer, my_data->size)) + return -EFAULT; */ + + return sizeRead; +} + +static int my_write(struct file *file, const char __user *user_buffer, size_t size, loff_t *offset) { + struct my_device_data *my_data = + (struct my_device_data*) file->private_data; + int sizeWritten = 0; + + printk( KERN_DEBUG "[my_write]\n" ); + /* copy_from_user */ + /* write data to device from my_data->buffer */ + sizeWritten = size; //only if sizeWritten == size ! + + return sizeWritten; +} + +static long my_ioctl (struct file *file, unsigned int cmd, unsigned long arg) { + struct my_device_data *my_data = + (struct my_device_data*) file->private_data; + /* my_ioctl_data mid; */ + + printk( KERN_DEBUG "[my_ioctl]\n" ); + switch(cmd) { + case MY_IOCTL_IN: + /* if( copy_from_user(&mid, (my_ioctl_data *) arg, sizeof(my_ioctl_data)) ) + return -EFAULT; */ + + /* process data and execute command */ + break; + default: + return -ENOTTY; + } + return 0; +} + + +struct file_operations my_fops = { + .owner = THIS_MODULE, + .open = my_open, + .read = my_read, + .write = my_write, + .release = my_close, + .unlocked_ioctl = my_ioctl +}; + +int init_module(void) { + int i, err; + + printk( KERN_DEBUG "[init_module]\n" ); + err = register_chrdev_region(MKDEV(MY_MAJOR, 0), MY_MAX_MINORS,"my_device_driver"); + if (err != 0) { + /* report error */ + return err; + } + + for(i = 0; i < MY_MAX_MINORS; i++) { + /* initialize devs[i] fields */ + cdev_init(&devs[i].cdev, &my_fops); + cdev_add(&devs[i].cdev, MKDEV(MY_MAJOR, i), 1); + } + + return 0; +} + +void cleanup_module(void) { + int i; + + printk( KERN_DEBUG "[cleanup_module]\n" ); + for(i = 0; i < MY_MAX_MINORS; i++) { + /* release devs[i] fields */ + cdev_del(&devs[i].cdev); + } + unregister_chrdev_region(MKDEV(MY_MAJOR, 0), MY_MAX_MINORS); +} + diff --git a/tools/labs/templates/device_drivers/include/so2_cdev.h b/tools/labs/templates/device_drivers/include/so2_cdev.h new file mode 100644 index 00000000000000..a88a8809dc5ab0 --- /dev/null +++ b/tools/labs/templates/device_drivers/include/so2_cdev.h @@ -0,0 +1,14 @@ +#ifndef __PSOCDEV_H__ +#define __PSOCDEV_H__ 1 + +#include + +#define BUFFER_SIZE 256 + +#define MY_IOCTL_PRINT _IOC(_IOC_NONE, 'k', 1, 0) +#define MY_IOCTL_SET_BUFFER _IOC(_IOC_WRITE, 'k', 2, BUFFER_SIZE) +#define MY_IOCTL_GET_BUFFER _IOC(_IOC_READ, 'k', 3, BUFFER_SIZE) +#define MY_IOCTL_DOWN _IOC(_IOC_NONE, 'k', 4, 0) +#define MY_IOCTL_UP _IOC(_IOC_NONE, 'k', 5, 0) + +#endif diff --git a/tools/labs/templates/device_drivers/kernel/Kbuild b/tools/labs/templates/device_drivers/kernel/Kbuild new file mode 100644 index 00000000000000..462ca0f2a4d467 --- /dev/null +++ b/tools/labs/templates/device_drivers/kernel/Kbuild @@ -0,0 +1,3 @@ +ccflags-y = -Wno-unused-function -Wno-unused-label -Wno-unused-variable + +obj-m = so2_cdev.o diff --git a/tools/labs/templates/device_drivers/kernel/so2_cdev.c b/tools/labs/templates/device_drivers/kernel/so2_cdev.c new file mode 100644 index 00000000000000..e768b8d0777337 --- /dev/null +++ b/tools/labs/templates/device_drivers/kernel/so2_cdev.c @@ -0,0 +1,245 @@ +/* + * Character device drivers lab + * + * All tasks + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../include/so2_cdev.h" + +MODULE_DESCRIPTION("SO2 character device"); +MODULE_AUTHOR("SO2"); +MODULE_LICENSE("GPL"); + +#define LOG_LEVEL KERN_INFO + +#define MY_MAJOR 42 +#define MY_MINOR 0 +#define NUM_MINORS 1 +#define MODULE_NAME "so2_cdev" +#define MESSAGE "hello\n" +#define IOCTL_MESSAGE "Hello ioctl" + +#ifndef BUFSIZ +#define BUFSIZ 4096 +#endif + + +struct so2_device_data { + /* TODO 2/1: add cdev member */ + struct cdev cdev; + /* TODO 4/2: add buffer with BUFSIZ elements */ + char buffer[BUFSIZ]; + size_t size; + /* TODO 7/2: extra members for home */ + wait_queue_head_t wq; + int flag; + /* TODO 3/1: add atomic_t access variable to keep track if file is opened */ + atomic_t access; +}; + +struct so2_device_data devs[NUM_MINORS]; + +static int so2_cdev_open(struct inode *inode, struct file *file) +{ + struct so2_device_data *data; + + /* TODO 2/1: print message when the device file is open. */ + printk(LOG_LEVEL "open called!\n"); + + /* TODO 3/1: inode->i_cdev contains our cdev struct, use container_of to obtain a pointer to so2_device_data */ + data = container_of(inode->i_cdev, struct so2_device_data, cdev); + + file->private_data = data; + +#ifndef EXTRA + /* TODO 3/2: return immediately if access is != 0, use atomic_cmpxchg */ + if (atomic_cmpxchg(&data->access, 0, 1) != 0) + return -EBUSY; +#endif + + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(10 * HZ); + + return 0; +} + +static int +so2_cdev_release(struct inode *inode, struct file *file) +{ + /* TODO 2/1: print message when the device file is closed. */ + printk(LOG_LEVEL "close called!\n"); + +#ifndef EXTRA + struct so2_device_data *data = + (struct so2_device_data *) file->private_data; + + /* TODO 3/1: reset access variable to 0, use atomic_set */ + atomic_set(&data->access, 0); +#endif + return 0; +} + +static ssize_t +so2_cdev_read(struct file *file, + char __user *user_buffer, + size_t size, loff_t *offset) +{ + struct so2_device_data *data = + (struct so2_device_data *) file->private_data; + size_t to_read; + +#ifdef EXTRA + /* TODO 7/6: extra tasks for home */ + if (!data->size) { + if (file->f_flags & O_NONBLOCK) + return -EAGAIN; + if (wait_event_interruptible(data->wq, data->size != 0)) + return -ERESTARTSYS; + } +#endif + + /* TODO 4/4: Copy data->buffer to user_buffer, use copy_to_user */ + to_read = (size > data->size - *offset) ? (data->size - *offset) : size; + if (copy_to_user(user_buffer, data->buffer + *offset, to_read) != 0) + return -EFAULT; + *offset += to_read; + + return to_read; +} + +static ssize_t +so2_cdev_write(struct file *file, + const char __user *user_buffer, + size_t size, loff_t *offset) +{ + struct so2_device_data *data = + (struct so2_device_data *) file->private_data; + + + /* TODO 5/5: copy user_buffer to data->buffer, use copy_from_user */ + size = (*offset + size > BUFSIZ) ? (BUFSIZ - *offset) : size; + if (copy_from_user(data->buffer + *offset, user_buffer, size) != 0) + return -EFAULT; + *offset += size; + data->size = *offset; + /* TODO 7/3: extra tasks for home */ +#ifdef EXTRA + wake_up_interruptible(&data->wq); +#endif + + return size; +} + +static long +so2_cdev_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + struct so2_device_data *data = + (struct so2_device_data *) file->private_data; + int ret = 0; + int remains; + + switch (cmd) { + /* TODO 6/3: if cmd = MY_IOCTL_PRINT, display IOCTL_MESSAGE */ + case MY_IOCTL_PRINT: + printk(LOG_LEVEL "%s\n", IOCTL_MESSAGE); + break; + /* TODO 7/19: extra tasks, for home */ + case MY_IOCTL_DOWN: + data->flag = 0; + ret = wait_event_interruptible(data->wq, data->flag != 0); + break; + case MY_IOCTL_UP: + data->flag = 1; + wake_up_interruptible(&data->wq); + break; + case MY_IOCTL_SET_BUFFER: + remains = copy_from_user(data->buffer, (char __user *)arg, + BUFFER_SIZE); + if (remains) + ret = -EFAULT; + data->size = BUFFER_SIZE - remains; + break; + case MY_IOCTL_GET_BUFFER: + if (copy_to_user((char __user *)arg, data->buffer, data->size)) + ret = -EFAULT; + break; + default: + ret = -EINVAL; + } + + return ret; +} + +static const struct file_operations so2_fops = { + .owner = THIS_MODULE, +/* TODO 2/2: add open and release functions */ + .open = so2_cdev_open, + .release = so2_cdev_release, +/* TODO 4/1: add read function */ + .read = so2_cdev_read, +/* TODO 5/1: add write function */ + .write = so2_cdev_write, +/* TODO 6/1: add ioctl function */ + .unlocked_ioctl = so2_cdev_ioctl, +}; + +static int so2_cdev_init(void) +{ + int err; + int i; + + /* TODO 1/6: register char device region for MY_MAJOR and NUM_MINORS starting at MY_MINOR */ + err = register_chrdev_region(MKDEV(MY_MAJOR, MY_MINOR), + NUM_MINORS, MODULE_NAME); + if (err != 0) { + pr_info("register_chrdev_region"); + return err; + } + + for (i = 0; i < NUM_MINORS; i++) { +#ifdef EXTRA + /* TODO 7/2: extra tasks, for home */ + devs[i].size = 0; + memset(devs[i].buffer, 0, sizeof(devs[i].buffer)); +#else + /*TODO 4/2: initialize buffer with MESSAGE string */ + memcpy(devs[i].buffer, MESSAGE, sizeof(MESSAGE)); + devs[i].size = sizeof(MESSAGE); + /* TODO 3/1: set access variable to 0, use atomic_set */ + atomic_set(&devs[i].access, 0); +#endif + /* TODO 7/2: extra tasks for home */ + init_waitqueue_head(&devs[i].wq); + devs[i].flag = 0; + /* TODO 2/2: init and add cdev to kernel core */ + cdev_init(&devs[i].cdev, &so2_fops); + cdev_add(&devs[i].cdev, MKDEV(MY_MAJOR, i), 1); + } + + return 0; +} + +static void so2_cdev_exit(void) +{ + int i; + + for (i = 0; i < NUM_MINORS; i++) { + /* TODO 2/1: delete cdev from kernel core */ + cdev_del(&devs[i].cdev); + } + + /* TODO 1/1: unregister char device region, for MY_MAJOR and NUM_MINORS starting at MY_MINOR */ + unregister_chrdev_region(MKDEV(MY_MAJOR, MY_MINOR), NUM_MINORS); +} + +module_init(so2_cdev_init); +module_exit(so2_cdev_exit); diff --git a/tools/labs/templates/device_drivers/user/Makefile b/tools/labs/templates/device_drivers/user/Makefile new file mode 100644 index 00000000000000..8b4ace8e72fe9f --- /dev/null +++ b/tools/labs/templates/device_drivers/user/Makefile @@ -0,0 +1,7 @@ +all: so2_cdev_test.c + gcc -m32 -static -o so2_cdev_test so2_cdev_test.c + +.PHONY: clean + +clean: + -rm -f *~ *.o so2_cdev_test diff --git a/tools/labs/templates/device_drivers/user/so2_cdev_test b/tools/labs/templates/device_drivers/user/so2_cdev_test new file mode 100755 index 00000000000000..e8e7a2e86967b3 Binary files /dev/null and b/tools/labs/templates/device_drivers/user/so2_cdev_test differ diff --git a/tools/labs/templates/device_drivers/user/so2_cdev_test.c b/tools/labs/templates/device_drivers/user/so2_cdev_test.c new file mode 100644 index 00000000000000..24a1f39d6898ea --- /dev/null +++ b/tools/labs/templates/device_drivers/user/so2_cdev_test.c @@ -0,0 +1,125 @@ +/* + * SO2 Lab - Linux device drivers (#4) + * User-space test file + */ + +#include +#include +#include +#include +#include +#include +#include +#include "../include/so2_cdev.h" + +#define DEVICE_PATH "/dev/so2_cdev" + +/* + * prints error message and exits + */ + +static void error(const char *message) +{ + perror(message); + exit(EXIT_FAILURE); +} + +/* + * print use case + */ + +static void usage(const char *argv0) +{ + printf("Usage: %s \n options:\n" + "\tp - print\n" + "\ts string - set buffer\n" + "\tg - get buffer\n" + "\td - down\n" + "\tu - up\n" + "\tn - open with O_NONBLOCK and read data\n", argv0); + exit(EXIT_FAILURE); +} + +/* + * Sample run: + * ./so2_cdev_test p ; print ioctl message + * ./so2_cdev_test d ; wait on wait_queue + * ./so2_cdev_test u ; wait on wait_queue + */ + +int main(int argc, char **argv) +{ + int fd; + char buffer[BUFFER_SIZE]; + + if (argc < 2) + usage(argv[0]); + + if (strlen(argv[1]) != 1) + usage(argv[0]); + + fd = open(DEVICE_PATH, O_RDONLY); + if (fd < 0) { + perror("open"); + exit(EXIT_FAILURE); + } + + switch (argv[1][0]) { + case 'p': /* print */ + if (ioctl(fd, MY_IOCTL_PRINT, 0) < 0) { + perror("ioctl"); + exit(EXIT_FAILURE); + } + + break; + case 's': /* set buffer */ + if (argc < 3) + usage(argv[0]); + memset(buffer, 0, BUFFER_SIZE); + strncpy(buffer, argv[2], BUFFER_SIZE); + if (ioctl(fd, MY_IOCTL_SET_BUFFER, buffer) < 0) { + perror("ioctl"); + exit(EXIT_FAILURE); + } + break; + case 'g': /* get buffer */ + if (ioctl(fd, MY_IOCTL_GET_BUFFER, buffer) < 0) { + perror("ioctl"); + exit(EXIT_FAILURE); + } + buffer[BUFFER_SIZE-1] = 0; + printf("IOCTL buffer contains %s\n", buffer); + break; + case 'd': /* down */ + if (ioctl(fd, MY_IOCTL_DOWN, 0) < 0) { + perror("ioctl"); + exit(EXIT_FAILURE); + } + break; + case 'u': /* up */ + if (ioctl(fd, MY_IOCTL_UP, 0) < 0) { + perror("ioctl"); + exit(EXIT_FAILURE); + } + break; + case 'n': + if (fcntl(fd, F_SETFL, O_RDONLY | O_NONBLOCK) < 0) { + perror("fcntl"); + exit(EXIT_FAILURE); + } + + if (read(fd, buffer, BUFFER_SIZE) < 0) { + perror("read"); + exit(EXIT_FAILURE); + } + buffer[BUFFER_SIZE-1] = 0; + printf("Device buffer contains %s\n", buffer); + break; + default: + error("Wrong parameter"); + } + + close(fd); + + return 0; +} diff --git a/tools/labs/templates/device_model/Kbuild b/tools/labs/templates/device_model/Kbuild new file mode 100644 index 00000000000000..984571ae0c7f53 --- /dev/null +++ b/tools/labs/templates/device_model/Kbuild @@ -0,0 +1,3 @@ +ccflags-y = -Wno-unused-function -Wno-unused-label -Wno-unused-variable + +obj-m = bex.o bex_misc.o diff --git a/tools/labs/templates/device_model/bex.c b/tools/labs/templates/device_model/bex.c new file mode 100644 index 00000000000000..4419b2389bf7a7 --- /dev/null +++ b/tools/labs/templates/device_model/bex.c @@ -0,0 +1,220 @@ +#include +#include +#include +#include +#include +#include + +#include "bex.h" + +MODULE_AUTHOR ("Kernel Hacker"); +MODULE_LICENSE ("GPL"); +MODULE_DESCRIPTION ("BEX bus module"); + +static int bex_match(struct device *dev, struct device_driver *driver) +{ + /* TODO 5/5: implement the bus match function */ + struct bex_device *bex_dev = to_bex_device(dev); + struct bex_driver *bex_drv = to_bex_driver(driver); + + if (!strcmp(bex_dev->type, bex_drv->type)) + return 1; + + return 0; +} + +static int bex_probe(struct device *dev) +{ + struct bex_device *bex_dev = to_bex_device(dev); + struct bex_driver *bex_drv = to_bex_driver(dev->driver); + + return bex_drv->probe(bex_dev); +} + +static int bex_remove(struct device *dev) +{ + struct bex_device *bex_dev = to_bex_device(dev); + struct bex_driver *bex_drv = to_bex_driver(dev->driver); + + bex_drv->remove(bex_dev); + return 0; +} + +static int bex_add_dev(const char *name, const char *type, int version); + +/* TODO 3/14: implement write only add attribute */ +static ssize_t +add_store(struct bus_type *bt, const char *buf, size_t count) +{ + char type[32], name[32]; + int version; + int ret; + + ret = sscanf(buf, "%31s %31s %d", name, type, &version); + if (ret != 3) + return -EINVAL; + + return bex_add_dev(name, type, version) ? : count; +} +BUS_ATTR(add, S_IWUSR, NULL, add_store); + +static int bex_del_dev(const char *name); + +/* TODO 3/13: implement write only del attribute */ +static ssize_t +del_store(struct bus_type *bt, const char *buf, size_t count) +{ + char name[32]; + int version; + + if (sscanf(buf, "%s", name) != 1) + return -EINVAL; + + return bex_del_dev(name) ? 0 : count; + +} +BUS_ATTR(del, S_IWUSR, NULL, del_store); + +static struct attribute *bex_bus_attrs[] = { + /* TODO 3/2: add del and add attributes */ + &bus_attr_add.attr, + &bus_attr_del.attr, + NULL +}; +ATTRIBUTE_GROUPS(bex_bus); + +struct bus_type bex_bus_type = { + .name = "bex", + .match = bex_match, + .probe = bex_probe, + .remove = bex_remove, + /* TODO 3: add bus groups attributes */ + .bus_groups = bex_bus_groups, +}; + +/*TODO 2/8: add read-only device attribute to show the type */ +static ssize_t +type_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct bex_device *bex_dev = to_bex_device(dev); + + return sprintf(buf, "%s\n", bex_dev->type); +} +DEVICE_ATTR_RO(type); + +/*TODO 2/8: add read-only device attribute to show the version */ +static ssize_t +version_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct bex_device *bex_dev = to_bex_device(dev); + + return sprintf(buf, "%d\n", bex_dev->version); +} +DEVICE_ATTR_RO(version); + +static struct attribute *bex_dev_attrs[] = { + /* TODO 2/2: add type and version attributes */ + &dev_attr_type.attr, + &dev_attr_version.attr, + NULL +}; +ATTRIBUTE_GROUPS(bex_dev); + +static int bex_dev_uevent(struct device *dev, struct kobj_uevent_env *env) +{ + return add_uevent_var(env, "MODALIAS=bex:%s", dev_name(dev)); +} + +static void bex_dev_release(struct device *dev) +{ + struct bex_device *bex_dev = to_bex_device(dev); + + kfree(bex_dev->type); + kfree(bex_dev); +} + +struct device_type bex_device_type = { + /* TODO 2: set the device groups attributes */ + .groups = bex_dev_groups, + .uevent = bex_dev_uevent, + .release = bex_dev_release, +}; + +static int bex_add_dev(const char *name, const char *type, int version) +{ + struct bex_device *bex_dev; + + bex_dev = kzalloc(sizeof(*bex_dev), GFP_KERNEL); + if (!bex_dev) + return -ENOMEM; + + bex_dev->type = kstrdup(type, GFP_KERNEL); + bex_dev->version = version; + + bex_dev->dev.bus = &bex_bus_type; + bex_dev->dev.type = &bex_device_type; + bex_dev->dev.parent = NULL; + + dev_set_name(&bex_dev->dev, "%s", name); + + return device_register(&bex_dev->dev); +} + +static int bex_del_dev(const char *name) +{ + struct device *dev; + + dev = bus_find_device_by_name(&bex_bus_type, NULL, name); + if (!dev) + return -EINVAL; + + device_unregister(dev); + put_device(dev); + + return 0; +} + +int bex_register_driver(struct bex_driver *drv) +{ + int ret; + + drv->driver.bus = &bex_bus_type; + ret = driver_register(&drv->driver); + if (ret) + return ret; + + return 0; +} +EXPORT_SYMBOL(bex_register_driver); + +void bex_unregister_driver(struct bex_driver *drv) +{ + driver_unregister(&drv->driver); +} +EXPORT_SYMBOL(bex_unregister_driver); + +static int __init my_bus_init (void) +{ + int ret; + + /* TODO 1/5: register the bus driver */ + ret = bus_register(&bex_bus_type); + if (ret < 0) { + pr_err("Unable to register bus\n"); + return ret; + } + + /* TODO 1: add a device */ + bex_add_dev("root", "none", 1); + + return 0; +} + +static void my_bus_exit (void) +{ + /* TODO 1: unregister the bus driver */ + bus_unregister(&bex_bus_type); +} + +module_init (my_bus_init); +module_exit (my_bus_exit); diff --git a/tools/labs/templates/device_model/bex.h b/tools/labs/templates/device_model/bex.h new file mode 100644 index 00000000000000..ba914bead634b6 --- /dev/null +++ b/tools/labs/templates/device_model/bex.h @@ -0,0 +1,28 @@ +#ifndef _BEX_H +#define _BEX_H + +#include + +struct bex_device { + const char *type; + int version; + struct device dev; +}; + +#define to_bex_device(drv) container_of(dev, struct bex_device, dev) + +struct bex_driver { + const char *type; + + int (*probe)(struct bex_device *dev); + void (*remove)(struct bex_device *dev); + + struct device_driver driver; +}; + +#define to_bex_driver(drv) container_of(drv, struct bex_driver, driver) + +int bex_register_driver(struct bex_driver *drv); +void bex_unregister_driver(struct bex_driver *drv); + +#endif diff --git a/tools/labs/templates/device_model/bex_misc.c b/tools/labs/templates/device_model/bex_misc.c new file mode 100644 index 00000000000000..c28d0e85815f10 --- /dev/null +++ b/tools/labs/templates/device_model/bex_misc.c @@ -0,0 +1,153 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include "bex.h" + +MODULE_DESCRIPTION("BEX misc driver"); +MODULE_AUTHOR("Kernel Hacker"); +MODULE_LICENSE("GPL"); + +#define BUF_SIZE 1024 + +struct bex_misc_device { + struct miscdevice misc; + struct bex_device *dev; + char buf[BUF_SIZE]; +}; + +static int my_open(struct inode *inode, struct file *file) +{ + return 0; +} + +static int my_release(struct inode *inode, struct file *file) +{ + return 0; +} + +static int my_read(struct file *file, char __user *user_buffer, + size_t size, loff_t *offset) +{ + struct bex_misc_device *bmd = (struct bex_misc_device *)file->private_data; + ssize_t len = min(sizeof(bmd->buf) - (ssize_t)*offset, size); + + if (len <= 0) + return 0; + + if (copy_to_user(user_buffer, bmd->buf + *offset, len)) + return -EFAULT; + + *offset += len; + return len; +} + +static int my_write(struct file *file, const char __user *user_buffer, + size_t size, loff_t *offset) +{ + struct bex_misc_device *bmd = (struct bex_misc_device *)file->private_data; + ssize_t len = min(sizeof(bmd->buf) - (ssize_t)*offset, size); + + if (len <= 0) + return 0; + + if (copy_from_user(bmd->buf + *offset, user_buffer, len)) + return -EFAULT; + + *offset += len; + return len; +} + +struct file_operations bex_misc_fops = { + .owner = THIS_MODULE, + .open = my_open, + .read = my_read, + .write = my_write, + .release = my_release, +}; + +static int bex_misc_count; + +int bex_misc_probe(struct bex_device *dev) +{ + struct bex_misc_device *bmd; + char buf[32]; + int ret; + + dev_info(&dev->dev, "%s: %s %d\n", __func__, dev->type, dev->version); + + /* TODO 6/4: refuse the probe is version > 1 */ + if (dev->version > 1) { + dev_info(&dev->dev, "unknown version: %d\n", dev->version); + return -ENODEV; + } + + bmd = kzalloc(sizeof(*bmd), GFP_KERNEL); + if (!bmd) + return -ENOMEM; + + bmd->misc.minor = MISC_DYNAMIC_MINOR; + snprintf(buf, sizeof(buf), "bex-misc-%d", bex_misc_count++); + bmd->misc.name = kstrdup(buf, GFP_KERNEL); + bmd->misc.parent = &dev->dev; + bmd->misc.fops = &bex_misc_fops; + bmd->dev = dev; + dev_set_drvdata(&dev->dev, bmd); + + /* TODO 6/5: register the misc device */ + ret = misc_register(&bmd->misc); + if (ret) { + dev_err(&dev->dev, "failed to register misc device: %d\n", ret); + return ret; + } + + return 0; +} + +void bex_misc_remove(struct bex_device *dev) +{ + struct bex_misc_device *bmd; + + bmd = (struct bex_misc_device *)dev_get_drvdata(&dev->dev); + + /* TODO 6: deregister the misc device */ + misc_deregister(&bmd->misc); + kfree(bmd); +} + +struct bex_driver bex_misc_driver = { + .type = "misc", + .probe = bex_misc_probe, + .remove = bex_misc_remove, + .driver = { + .owner = THIS_MODULE, + .name = "bex_misc", + }, +}; + +static int my_init(void) +{ + int err; + + /* TODO 4/5: register the driver */ + err = bex_register_driver(&bex_misc_driver); + if(err) { + pr_err("unable to register driver: %d\n", err); + return err; + } + + return 0; +} + +static void my_exit(void) +{ + /* TODO 4: unregister the driver */ + bex_unregister_driver(&bex_misc_driver); +} + +module_init(my_init); +module_exit(my_exit); diff --git a/tools/labs/templates/device_model/makenode b/tools/labs/templates/device_model/makenode new file mode 100755 index 00000000000000..feaa232c2bfd35 --- /dev/null +++ b/tools/labs/templates/device_model/makenode @@ -0,0 +1,8 @@ +#!/bin/sh +device="echodev" +major=42 +minor=0 + +rm -f /dev/${device} +mknod /dev/${device} c $major $minor + diff --git a/tools/labs/templates/filesystems/minfs/kernel/Kbuild b/tools/labs/templates/filesystems/minfs/kernel/Kbuild new file mode 100644 index 00000000000000..b243430771727e --- /dev/null +++ b/tools/labs/templates/filesystems/minfs/kernel/Kbuild @@ -0,0 +1,3 @@ +EXTRA_CFLAGS = -Wall -g -Wno-unused + +obj-m = minfs.o diff --git a/tools/labs/templates/filesystems/minfs/kernel/minfs.c b/tools/labs/templates/filesystems/minfs/kernel/minfs.c new file mode 100644 index 00000000000000..e946c0640803ab --- /dev/null +++ b/tools/labs/templates/filesystems/minfs/kernel/minfs.c @@ -0,0 +1,621 @@ +/* + * SO2 Lab - Filesystem drivers + * Exercise #2 (dev filesystem) + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "minfs.h" + +MODULE_DESCRIPTION("Simple filesystem"); +MODULE_AUTHOR("SO2"); +MODULE_LICENSE("GPL"); + +#define LOG_LEVEL KERN_ALERT + + +struct minfs_sb_info { + __u8 version; + unsigned long imap; + struct buffer_head *sbh; +}; + +struct minfs_inode_info { + __u16 data_block; + struct inode vfs_inode; +}; + +/* declarations of functions that are part of operation structures */ + +static int minfs_readdir(struct file *filp, struct dir_context *ctx); +static struct dentry *minfs_lookup(struct inode *dir, + struct dentry *dentry, unsigned int flags); +static int minfs_create(struct inode *dir, struct dentry *dentry, + umode_t mode, bool excl); + +/* dir and inode operation structures */ + +static const struct file_operations minfs_dir_operations = { + .read = generic_read_dir, + .iterate = minfs_readdir, +}; + +static const struct inode_operations minfs_dir_inode_operations = { + .lookup = minfs_lookup, + /* TODO 7/1: Use minfs_create as the create function. */ + .create = minfs_create, +}; + +static const struct address_space_operations minfs_aops = { + .readpage = simple_readpage, + .write_begin = simple_write_begin, + .write_end = simple_write_end, +}; + +static const struct file_operations minfs_file_operations = { + .read_iter = generic_file_read_iter, + .write_iter = generic_file_write_iter, + .mmap = generic_file_mmap, + .llseek = generic_file_llseek, +}; + +static const struct inode_operations minfs_file_inode_operations = { + .getattr = simple_getattr, +}; + +static struct inode *minfs_iget(struct super_block *s, unsigned long ino) +{ + struct minfs_inode *mi; + struct buffer_head *bh; + struct inode *inode; + struct minfs_inode_info *mii; + + /* Allocate VFS inode. */ + inode = iget_locked(s, ino); + if (inode == NULL) { + printk(LOG_LEVEL "error aquiring inode\n"); + return ERR_PTR(-ENOMEM); + } + + /* Return inode from cache */ + if (!(inode->i_state & I_NEW)) + return inode; + + /* TODO 4/2: Read block with inodes. It's the second block on + * the device, i.e. the block with the index 1. This is the index + * to be passed to sb_bread(). + */ + if (!(bh = sb_bread(s, MINFS_INODE_BLOCK))) + goto out_bad_sb; + + /* TODO 4/1: Get inode with index ino from the block. */ + mi = ((struct minfs_inode *) bh->b_data) + ino; + + /* TODO 4/6: fill VFS inode */ + inode->i_mode = mi->mode; + i_uid_write(inode, mi->uid); + i_gid_write(inode, mi->gid); + inode->i_size = mi->size; + inode->i_blocks = 0; + inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode); + + /* TODO 7/1: Fill address space operations (inode->i_mapping->a_ops) */ + inode->i_mapping->a_ops = &minfs_aops; + + if (S_ISDIR(inode->i_mode)) { + /* TODO 4/2: Fill dir inode operations. */ + inode->i_op = &simple_dir_inode_operations; + inode->i_fop = &simple_dir_operations; + + /* TODO 5/2: Use minfs_dir_inode_operations for i_op + * and minfs_dir_operations for i_fop. */ + inode->i_op = &minfs_dir_inode_operations; + inode->i_fop = &minfs_dir_operations; + + /* TODO 4/1: Directory inodes start off with i_nlink == 2. + * (use inc_link) */ + inc_nlink(inode); + } + + /* TODO 7/4: Fill inode and file operations for regular files + * (i_op and i_fop). Use the S_ISREG macro. + */ + if (S_ISREG(inode->i_mode)) { + inode->i_op = &minfs_file_inode_operations; + inode->i_fop = &minfs_file_operations; + } + + /* fill data for mii */ + mii = container_of(inode, struct minfs_inode_info, vfs_inode); + + /* TODO 4/1: uncomment after the minfs_inode is initialized */ + mii->data_block = mi->data_block; + //mii->data_block = mi->data_block; + + /* Free resources. */ + /* TODO 4/1: uncomment after the buffer_head is initialized */ + brelse(bh); + //brelse(bh); + unlock_new_inode(inode); + + return inode; + +out_bad_sb: + iget_failed(inode); + return NULL; +} + +static int minfs_readdir(struct file *filp, struct dir_context *ctx) +{ + struct buffer_head *bh; + struct minfs_dir_entry *de; + struct minfs_inode_info *mii; + struct inode *inode; + struct super_block *sb; + int over; + int err = 0; + + /* TODO 5/2: Get inode of directory and container inode. */ + inode = file_inode(filp); + mii = container_of(inode, struct minfs_inode_info, vfs_inode); + + /* TODO 5/1: Get superblock from inode (i_sb). */ + sb = inode->i_sb; + + /* TODO 5/6: Read data block for directory inode. */ + bh = sb_bread(sb, mii->data_block); + if (bh == NULL) { + printk(LOG_LEVEL "could not read block\n"); + err = -ENOMEM; + goto out_bad_sb; + } + + for (; ctx->pos < MINFS_NUM_ENTRIES; ctx->pos++) { + /* TODO 5/1: Data block contains an array of + * "struct minfs_dir_entry". Use `de' for storing. + */ + de = (struct minfs_dir_entry *) bh->b_data + ctx->pos; + + /* TODO 5/3: Step over empty entries (de->ino == 0). */ + if (de->ino == 0) { + continue; + } + + /* + * Use `over` to store return value of dir_emit and exit + * if required. + */ + over = dir_emit(ctx, de->name, MINFS_NAME_LEN, de->ino, + DT_UNKNOWN); + if (over) { + printk(KERN_DEBUG "Read %s from folder %s, ctx->pos: %lld\n", + de->name, + filp->f_path.dentry->d_name.name, + ctx->pos); + ctx->pos++; + goto done; + } + } + +done: + brelse(bh); +out_bad_sb: + return err; +} + +/* + * Find dentry in parent folder. Return parent folder's data buffer_head. + */ + +static struct minfs_dir_entry *minfs_find_entry(struct dentry *dentry, + struct buffer_head **bhp) +{ + struct buffer_head *bh; + struct inode *dir = dentry->d_parent->d_inode; + struct minfs_inode_info *mii = container_of(dir, + struct minfs_inode_info, vfs_inode); + struct super_block *sb = dir->i_sb; + const char *name = dentry->d_name.name; + struct minfs_dir_entry *final_de = NULL; + struct minfs_dir_entry *de; + int i; + + /* TODO 6/6: Read parent folder data block (contains dentries). + * Fill bhp with return value. + */ + bh = sb_bread(sb, mii->data_block); + if (bh == NULL) { + printk(LOG_LEVEL "could not read block\n"); + return NULL; + } + *bhp = bh; + + for (i = 0; i < MINFS_NUM_ENTRIES; i++) { + /* TODO 6/10: Traverse all entries, find entry by name + * Use `de' to traverse. Use `final_de' to store dentry + * found, if existing. + */ + de = ((struct minfs_dir_entry *) bh->b_data) + i; + if (de->ino != 0) { + /* found it */ + if (strcmp(name, de->name) == 0) { + printk(KERN_DEBUG "Found entry %s on position: %zd\n", + name, i); + final_de = de; + break; + } + } + } + + /* bh needs to be released by caller. */ + return final_de; +} + +static struct dentry *minfs_lookup(struct inode *dir, + struct dentry *dentry, unsigned int flags) +{ + /* TODO 6/1: Comment line. */ + // \ + return simple_lookup(dir, dentry, flags); + + struct super_block *sb = dir->i_sb; + struct minfs_dir_entry *de; + struct buffer_head *bh = NULL; + struct inode *inode = NULL; + + dentry->d_op = sb->s_root->d_op; + + de = minfs_find_entry(dentry, &bh); + if (de != NULL) { + printk(KERN_DEBUG "getting entry: name: %s, ino: %d\n", + de->name, de->ino); + inode = minfs_iget(sb, de->ino); + if (IS_ERR(inode)) + return ERR_CAST(inode); + } + + d_add(dentry, inode); + brelse(bh); + + printk(KERN_DEBUG "looked up dentry %s\n", dentry->d_name.name); + + return NULL; +} + +static struct inode *minfs_alloc_inode(struct super_block *s) +{ + struct minfs_inode_info *mii; + + /* TODO 3/4: Allocate minfs_inode_info. */ + mii = kzalloc(sizeof(struct minfs_inode_info), GFP_KERNEL); + if (mii == NULL) + return NULL; + + /* TODO 3/1: init VFS inode in minfs_inode_info */ + inode_init_once(&mii->vfs_inode); + + return &mii->vfs_inode; +} + +static void minfs_destroy_inode(struct inode *inode) +{ + /* TODO 3/1: free minfs_inode_info */ + kfree(container_of(inode, struct minfs_inode_info, vfs_inode)); +} + +/* + * Create a new VFS inode. Do basic initialization and fill imap. + */ + +static struct inode *minfs_new_inode(struct inode *dir) +{ + struct super_block *sb = dir->i_sb; + struct minfs_sb_info *sbi = sb->s_fs_info; + struct inode *inode; + int idx; + + /* TODO 7/5: Find first available inode. */ + idx = find_first_zero_bit(&sbi->imap, MINFS_NUM_INODES); + if (idx == MINFS_NUM_INODES) { + printk(LOG_LEVEL "no space left in imap\n"); + return NULL; + } + + /* TODO 7/2: Mark the inode as used in the bitmap and mark + * the superblock buffer head as dirty. + */ + __test_and_set_bit(idx, &sbi->imap); + mark_buffer_dirty(sbi->sbh); + + /* TODO 7/8: Call new_inode(), fill inode fields + * and insert inode into inode hash table. + */ + inode = new_inode(sb); + inode->i_uid = current_fsuid(); + inode->i_gid = current_fsgid(); + inode->i_ino = idx; + inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode); + inode->i_blocks = 0; + + insert_inode_hash(inode); + + /* Actual writing to the disk will be done in minfs_write_inode, + * which will be called at a later time. + */ + + return inode; +} + +/* + * Add dentry link on parent inode disk structure. + */ + +static int minfs_add_link(struct dentry *dentry, struct inode *inode) +{ + struct buffer_head *bh; + struct inode *dir; + struct super_block *sb; + struct minfs_inode_info *mii; + struct minfs_dir_entry *de; + int i; + int err = 0; + + /* TODO 7/3: Get: directory inode (in inode); containing inode (in mii); superblock (in sb). */ + dir = dentry->d_parent->d_inode; + mii = container_of(dir, struct minfs_inode_info, vfs_inode); + sb = dir->i_sb; + + /* TODO 7/1: Read dir data block (use sb_bread). */ + bh = sb_bread(sb, mii->data_block); + + /* TODO 7/10: Find first free dentry (de->ino == 0). */ + for (i = 0; i < MINFS_NUM_ENTRIES; i++) { + de = (struct minfs_dir_entry *) bh->b_data + i; + if (de->ino == 0) + break; + } + + if (i == MINFS_NUM_ENTRIES) { + err = -ENOSPC; + goto out; + } + + /* TODO 7/5: Place new entry in the available slot. Mark buffer_head + * as dirty. */ + de->ino = inode->i_ino; + memcpy(de->name, dentry->d_name.name, MINFS_NAME_LEN); + dir->i_mtime = dir->i_ctime = current_time(inode); + + mark_buffer_dirty(bh); + +out: + brelse(bh); + + return err; +} + +/* + * Create a VFS file inode. Use minfs_file_... operations. + */ + +static int minfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, + bool excl) +{ + struct inode *inode; + struct minfs_inode_info *mii; + int err; + + inode = minfs_new_inode(dir); + if (inode == NULL) { + printk(LOG_LEVEL "error allocating new inode\n"); + err = -ENOMEM; + goto err_new_inode; + } + + inode->i_mode = mode; + inode->i_op = &minfs_file_inode_operations; + inode->i_fop = &minfs_file_operations; + mii = container_of(inode, struct minfs_inode_info, vfs_inode); + mii->data_block = MINFS_FIRST_DATA_BLOCK + inode->i_ino; + + err = minfs_add_link(dentry, inode); + if (err != 0) + goto err_add_link; + + d_instantiate(dentry, inode); + mark_inode_dirty(inode); + + printk(KERN_DEBUG "new file inode created (ino = %lu)\n", + inode->i_ino); + + return 0; + +err_add_link: + inode_dec_link_count(inode); + iput(inode); +err_new_inode: + return err; +} + +/* + * Write VFS inode contents to disk inode. + */ + +static int minfs_write_inode(struct inode *inode, + struct writeback_control *wbc) +{ + struct super_block *sb = inode->i_sb; + struct minfs_inode *mi; + struct minfs_inode_info *mii = container_of(inode, + struct minfs_inode_info, vfs_inode); + struct buffer_head *bh; + int err = 0; + + bh = sb_bread(sb, MINFS_INODE_BLOCK); + if (bh == NULL) { + printk(LOG_LEVEL "could not read block\n"); + err = -ENOMEM; + goto out; + } + + mi = (struct minfs_inode *) bh->b_data + inode->i_ino; + + /* fill disk inode */ + mi->mode = inode->i_mode; + mi->uid = i_uid_read(inode); + mi->gid = i_gid_read(inode); + mi->size = inode->i_size; + mi->data_block = mii->data_block; + + printk(KERN_DEBUG "mode is %05o; data_block is %d\n", mi->mode, + mii->data_block); + + mark_buffer_dirty(bh); + brelse(bh); + + printk(KERN_DEBUG "wrote inode %lu\n", inode->i_ino); + +out: + return err; +} + +static void minfs_put_super(struct super_block *sb) +{ + struct minfs_sb_info *sbi = sb->s_fs_info; + + /* Free superblock buffer head. */ + mark_buffer_dirty(sbi->sbh); + brelse(sbi->sbh); + + printk(KERN_DEBUG "released superblock resources\n"); +} + +static const struct super_operations minfs_ops = { + .statfs = simple_statfs, + .put_super = minfs_put_super, + /* TODO 4/2: add alloc and destroy inode functions */ + .alloc_inode = minfs_alloc_inode, + .destroy_inode = minfs_destroy_inode, + /* TODO 7/1: = set write_inode function. */ + .write_inode = minfs_write_inode, +}; + +static int minfs_fill_super(struct super_block *s, void *data, int silent) +{ + struct minfs_sb_info *sbi; + struct minfs_super_block *ms; + struct inode *root_inode; + struct dentry *root_dentry; + struct buffer_head *bh; + int ret = -EINVAL; + + sbi = kzalloc(sizeof(struct minfs_sb_info), GFP_KERNEL); + if (!sbi) + return -ENOMEM; + s->s_fs_info = sbi; + + /* Set block size for superblock. */ + if (!sb_set_blocksize(s, MINFS_BLOCK_SIZE)) + goto out_bad_blocksize; + + /* TODO 2/3: Read block with superblock. It's the first block on + * the device, i.e. the block with the index 0. This is the index + * to be passed to sb_bread(). + */ + bh = sb_bread(s, MINFS_SUPER_BLOCK); + if (bh == NULL) + goto out_bad_sb; + + /* TODO 2/1: interpret read data as minfs_super_block */ + ms = (struct minfs_super_block *) bh->b_data; + + /* TODO 2/2: check magic number with value defined in minfs.h. jump to out_bad_magic if not suitable */ + if (ms->magic != MINFS_MAGIC) + goto out_bad_magic; + + /* TODO 2/2: fill super_block with magic_number, super_operations */ + s->s_magic = MINFS_MAGIC; + s->s_op = &minfs_ops; + + /* TODO 2/2: Fill sbi with rest of information from disk superblock + * (i.e. version). + */ + sbi->version = ms->version; + sbi->imap = ms->imap; + + /* allocate root inode and root dentry */ + /* TODO 2/0: use myfs_get_inode instead of minfs_iget */ + root_inode = minfs_iget(s, MINFS_ROOT_INODE); + if (!root_inode) + goto out_bad_inode; + + root_dentry = d_make_root(root_inode); + if (!root_dentry) + goto out_iput; + s->s_root = root_dentry; + + /* Store superblock buffer_head for further use. */ + sbi->sbh = bh; + + return 0; + +out_iput: + iput(root_inode); +out_bad_inode: + printk(LOG_LEVEL "bad inode\n"); +out_bad_magic: + printk(LOG_LEVEL "bad magic number\n"); + brelse(bh); +out_bad_sb: + printk(LOG_LEVEL "error reading buffer_head\n"); +out_bad_blocksize: + printk(LOG_LEVEL "bad block size\n"); + s->s_fs_info = NULL; + kfree(sbi); + return ret; +} + +static struct dentry *minfs_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) +{ + /* TODO 1/1: call superblock mount function */ + return mount_bdev(fs_type, flags, dev_name, data, minfs_fill_super); +} + +static struct file_system_type minfs_fs_type = { + .owner = THIS_MODULE, + .name = "minfs", + /* TODO 1/3: add mount, kill_sb and fs_flags */ + .mount = minfs_mount, + .kill_sb = kill_block_super, + .fs_flags = FS_REQUIRES_DEV, +}; + +static int __init minfs_init(void) +{ + int err; + + err = register_filesystem(&minfs_fs_type); + if (err) { + printk(LOG_LEVEL "register_filesystem failed\n"); + return err; + } + + return 0; +} + +static void __exit minfs_exit(void) +{ + unregister_filesystem(&minfs_fs_type); +} + +module_init(minfs_init); +module_exit(minfs_exit); diff --git a/tools/labs/templates/filesystems/minfs/kernel/minfs.h b/tools/labs/templates/filesystems/minfs/kernel/minfs.h new file mode 100644 index 00000000000000..92285597a938ce --- /dev/null +++ b/tools/labs/templates/filesystems/minfs/kernel/minfs.h @@ -0,0 +1,45 @@ +#ifndef _MINFS_H +#define _MINFS_H 1 + +#define MINFS_MAGIC 0xDEADF00D +#define MINFS_NAME_LEN 16 +#define MINFS_BLOCK_SIZE 4096 +#define MINFS_NUM_INODES 32 +#define MINFS_NUM_ENTRIES 32 + +#define MINFS_ROOT_INODE 0 + +/* + * Filesystem layout: + * + * SB IZONE DATA + * ^ ^ (1 block) + * | | + * +-0 +-- 4096 + */ + +#define MINFS_SUPER_BLOCK 0 +#define MINFS_INODE_BLOCK 1 +#define MINFS_FIRST_DATA_BLOCK 2 + +struct minfs_super_block { + unsigned long magic; + __u8 version; + unsigned long imap; +}; + +struct minfs_dir_entry { + __u32 ino; + char name[MINFS_NAME_LEN]; +}; + +/* A minfs inode uses a single block. */ +struct minfs_inode { + __u32 mode; + __u32 uid; + __u32 gid; + __u32 size; + __u16 data_block; +}; + +#endif /* _MINFS_H */ diff --git a/tools/labs/templates/filesystems/minfs/user/.gitignore b/tools/labs/templates/filesystems/minfs/user/.gitignore new file mode 100644 index 00000000000000..970317ec7dd653 --- /dev/null +++ b/tools/labs/templates/filesystems/minfs/user/.gitignore @@ -0,0 +1 @@ +/mkfs.minfs diff --git a/tools/labs/templates/filesystems/minfs/user/Makefile b/tools/labs/templates/filesystems/minfs/user/Makefile new file mode 100644 index 00000000000000..b8e754a6b70fff --- /dev/null +++ b/tools/labs/templates/filesystems/minfs/user/Makefile @@ -0,0 +1,13 @@ +CFLAGS = -Wall -g -m32 +LDFLAGS = -static -m32 + +.PHONY: all clean + +all: mkfs.minfs + +mkfs.minfs: mkfs.minfs.o + +mkfs.minfs.o: mkfs.minfs.c ../kernel/minfs.h + +clean: + -rm -f *~ *.o mkfs.minfs diff --git a/tools/labs/templates/filesystems/minfs/user/mkfs.minfs.c b/tools/labs/templates/filesystems/minfs/user/mkfs.minfs.c new file mode 100644 index 00000000000000..c5e8f9132bb2c0 --- /dev/null +++ b/tools/labs/templates/filesystems/minfs/user/mkfs.minfs.c @@ -0,0 +1,81 @@ +#include +#include +#include + +#include +#include + +#include "../kernel/minfs.h" + +/* + * mk_minfs file + */ + +int main(int argc, char **argv) +{ + FILE *file; + char buffer[MINFS_BLOCK_SIZE]; + struct minfs_super_block msb; + struct minfs_inode root_inode; + struct minfs_inode file_inode; + struct minfs_dir_entry file_dentry; + int i; + + if (argc != 2) { + fprintf(stderr, "Usage: %s block_device_name\n", argv[0]); + exit(EXIT_FAILURE); + } + + file = fopen(argv[1], "w+"); + if (file == NULL) { + perror("fopen"); + exit(EXIT_FAILURE); + } + + memset(&msb, 0, sizeof(struct minfs_super_block)); + + msb.magic = MINFS_MAGIC; + msb.version = 1; + msb.imap = 0x03; + + /* zero disk */ + memset(buffer, 0, MINFS_BLOCK_SIZE); + for (i = 0; i < 128; i++) + fwrite(buffer, 1, MINFS_BLOCK_SIZE, file); + + fseek(file, 0, SEEK_SET); + + /* initialize super block */ + fwrite(&msb, sizeof(msb), 1, file); + + /* initialize root inode */ + memset(&root_inode, 0, sizeof(root_inode)); + root_inode.uid = 0; + root_inode.gid = 0; + root_inode.mode = S_IFDIR | 0755; + root_inode.size = 0; + root_inode.data_block = MINFS_FIRST_DATA_BLOCK; + + fseek(file, MINFS_INODE_BLOCK * MINFS_BLOCK_SIZE, SEEK_SET); + fwrite(&root_inode, sizeof(root_inode), 1, file); + + /* initialize new inode */ + memset(&file_inode, 0, sizeof(file_inode)); + file_inode.uid = 0; + file_inode.gid = 0; + file_inode.mode = S_IFREG | 0644; + file_inode.size = 0; + file_inode.data_block = MINFS_FIRST_DATA_BLOCK + 1; + fwrite(&file_inode, sizeof(file_inode), 1, file); + + /* add dentry information */ + memset(&file_dentry, 0, sizeof(file_dentry)); + file_dentry.ino = 1; + memcpy(file_dentry.name, "a.txt", 5); + fseek(file, MINFS_FIRST_DATA_BLOCK * MINFS_BLOCK_SIZE, SEEK_SET); + fwrite(&file_dentry, sizeof(file_dentry), 1, file); + + fclose(file); + + return 0; +} diff --git a/tools/labs/templates/filesystems/minfs/user/test-minfs-0.sh b/tools/labs/templates/filesystems/minfs/user/test-minfs-0.sh new file mode 100755 index 00000000000000..7da9597bc9c2f4 --- /dev/null +++ b/tools/labs/templates/filesystems/minfs/user/test-minfs-0.sh @@ -0,0 +1,25 @@ +#!/bin/sh + +# load module +insmod ../kernel/minfs.ko + +# create mount point +mkdir -p /mnt/minfs + +# format partition +./mkfs.minfs /dev/vdb + +# mount filesystem +mount -t minfs /dev/vdb /mnt/minfs + +# show registered filesystems +cat /proc/filesystems + +# show mounted filesystems +cat /proc/mounts + +# umount filesystem +umount /mnt/minfs + +# unload module +rmmod minfs diff --git a/tools/labs/templates/filesystems/minfs/user/test-minfs-1.sh b/tools/labs/templates/filesystems/minfs/user/test-minfs-1.sh new file mode 100755 index 00000000000000..0a824ae9251c27 --- /dev/null +++ b/tools/labs/templates/filesystems/minfs/user/test-minfs-1.sh @@ -0,0 +1,24 @@ +#!/bin/sh + +# load module +insmod ../kernel/minfs.ko + +# create mount point +mkdir -p /mnt/minfs + +# format partition +./mkfs.minfs /dev/vdb + +# mount filesystem +mount -t minfs /dev/vdb /mnt/minfs + +# list all filesystem files +cd /mnt/minfs +ls -la + +# unmount filesystem +cd .. +umount /mnt/minfs + +# unload module +rmmod minfs diff --git a/tools/labs/templates/filesystems/minfs/user/test-minfs-2.sh b/tools/labs/templates/filesystems/minfs/user/test-minfs-2.sh new file mode 100755 index 00000000000000..11970dfa0234cc --- /dev/null +++ b/tools/labs/templates/filesystems/minfs/user/test-minfs-2.sh @@ -0,0 +1,39 @@ +#!/bin/sh + +pushd . > /dev/null 2>&1 + +# load module +insmod ../kernel/minfs.ko + +# create mount point +mkdir -p /mnt/minfs + +# format partition +./mkfs.minfs /dev/vdb + +# mount filesystem +mount -t minfs /dev/vdb /mnt/minfs + +# change to minfs root folder +cd /mnt/minfs + +# create new file +touch b.txt && echo "OK. File created." || echo "NOT OK. File creation failed." + +# unmount filesystem +cd .. +umount /mnt/minfs + +popd > /dev/null 2>&1 + +# mount filesystem +mount -t minfs /dev/vdb /mnt/minfs + +# check whether b.txt is still there +ls /mnt/minfs | grep b.txt && echo "OK. File b.txt exists " || echo "NOT OK. File b.txt does not exist." + +# unmount filesystem +umount /mnt/minfs + +# unload module +rmmod minfs diff --git a/tools/labs/templates/filesystems/minfs/user/test-minfs.sh b/tools/labs/templates/filesystems/minfs/user/test-minfs.sh new file mode 100755 index 00000000000000..ea704e07600c2e --- /dev/null +++ b/tools/labs/templates/filesystems/minfs/user/test-minfs.sh @@ -0,0 +1,35 @@ +#!/bin/sh + +set -ex + +#load module +insmod ../kernel/minfs.ko + +#create mount point +mkdir -p /mnt/minfs + +#format partition +./mkfs.minfs /dev/vdb + +#mount filesystem +mount -t minfs /dev/vdb /mnt/minfs + +#show registered filesystems +cat /proc/filesystems | grep minfs + +#show mounted filesystems +cat /proc/mounts | grep minfs + +#show filesystem statistics +stat -f /mnt/minfs + +#list all filesystem files +cd /mnt/minfs +ls -la + +#unmount filesystem +cd .. +umount /mnt/minfs + +#unload module +rmmod minfs diff --git a/tools/labs/templates/filesystems/myfs/Kbuild b/tools/labs/templates/filesystems/myfs/Kbuild new file mode 100644 index 00000000000000..1b3fa9316852ee --- /dev/null +++ b/tools/labs/templates/filesystems/myfs/Kbuild @@ -0,0 +1,3 @@ +EXTRA_CFLAGS = -Wall -g -Wno-unused + +obj-m = myfs.o diff --git a/tools/labs/templates/filesystems/myfs/myfs.c b/tools/labs/templates/filesystems/myfs/myfs.c new file mode 100644 index 00000000000000..9046b4eaf4eff3 --- /dev/null +++ b/tools/labs/templates/filesystems/myfs/myfs.c @@ -0,0 +1,229 @@ +/* + * SO2 Lab - Filesystem drivers + * Exercise #1 (no-dev filesystem) + */ + +#include +#include +#include +#include +#include + +MODULE_DESCRIPTION("Simple no-dev filesystem"); +MODULE_AUTHOR("SO2"); +MODULE_LICENSE("GPL"); + +#define MYFS_BLOCKSIZE 4096 +#define MYFS_BLOCKSIZE_BITS 12 +#define MYFS_MAGIC 0xbeefcafe +#define LOG_LEVEL KERN_ALERT + +/* declarations of functions that are part of operation structures */ + +static int myfs_mknod(struct inode *dir, + struct dentry *dentry, umode_t mode, dev_t dev); +static int myfs_create(struct inode *dir, struct dentry *dentry, + umode_t mode, bool excl); +static int myfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode); + +/* TODO 2/4: define super_operations structure */ +static const struct super_operations myfs_ops = { + .statfs = simple_statfs, + .drop_inode = generic_drop_inode, +}; + +static const struct inode_operations myfs_dir_inode_operations = { + /* TODO 5/8: Fill dir inode operations structure. */ + .create = myfs_create, + .lookup = simple_lookup, + .link = simple_link, + .unlink = simple_unlink, + .mkdir = myfs_mkdir, + .rmdir = simple_rmdir, + .mknod = myfs_mknod, + .rename = simple_rename, +}; + +static const struct file_operations myfs_file_operations = { + /* TODO 6/4: Fill file operations structure. */ + .read_iter = generic_file_read_iter, + .write_iter = generic_file_write_iter, + .mmap = generic_file_mmap, + .llseek = generic_file_llseek, +}; + +static const struct inode_operations myfs_file_inode_operations = { + /* TODO 6/1: Fill file inode operations structure. */ + .getattr = simple_getattr, +}; + +static const struct address_space_operations myfs_aops = { + /* TODO 6/3: Fill address space operations structure. */ + .readpage = simple_readpage, + .write_begin = simple_write_begin, + .write_end = simple_write_end, +}; + +struct inode *myfs_get_inode(struct super_block *sb, const struct inode *dir, + int mode) +{ + struct inode *inode = new_inode(sb); + + if (!inode) + return NULL; + + /* TODO 3/3: fill inode structure + * - mode + * - uid + * - gid + * - atime,ctime,mtime + * - ino + */ + inode_init_owner(inode, dir, mode); + inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode); + inode->i_ino = 1; + + /* TODO 5/1: Init i_ino using get_next_ino */ + inode->i_ino = get_next_ino(); + + /* TODO 6/1: Initialize address space operations. */ + inode->i_mapping->a_ops = &myfs_aops; + + if (S_ISDIR(mode)) { + /* TODO 3/2: set inode operations for dir inodes. */ + inode->i_op = &simple_dir_inode_operations; + inode->i_fop = &simple_dir_operations; + + /* TODO 5/1: use myfs_dir_inode_operations for inode + * operations (i_op). + */ + inode->i_op = &myfs_dir_inode_operations; + + /* TODO 3/1: directory inodes start off with i_nlink == 2 (for "." entry). + * Directory link count should be incremented (use inc_nlink). + */ + inc_nlink(inode); + } + + /* TODO 6/4: Set file inode and file operations for regular files + * (use the S_ISREG macro). + */ + if (S_ISREG(mode)) { + inode->i_op = &myfs_file_inode_operations; + inode->i_fop = &myfs_file_operations; + } + + return inode; +} + +/* TODO 5/33: Implement myfs_mknod, myfs_create, myfs_mkdir. */ +static int myfs_mknod(struct inode *dir, + struct dentry *dentry, umode_t mode, dev_t dev) +{ + struct inode *inode = myfs_get_inode(dir->i_sb, dir, mode); + + if (inode == NULL) + return -ENOSPC; + + d_instantiate(dentry, inode); + dget(dentry); + dir->i_mtime = dir->i_ctime = current_time(inode); + + return 0; +} + +static int myfs_create(struct inode *dir, struct dentry *dentry, + umode_t mode, bool excl) +{ + return myfs_mknod(dir, dentry, mode | S_IFREG, 0); +} + +static int myfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) +{ + int ret; + + ret = myfs_mknod(dir, dentry, mode | S_IFDIR, 0); + if (ret != 0) + return ret; + + inc_nlink(dir); + + return 0; +} + +static int myfs_fill_super(struct super_block *sb, void *data, int silent) +{ + struct inode *root_inode; + struct dentry *root_dentry; + + /* TODO 2/5: fill super_block + * - blocksize, blocksize_bits + * - magic + * - super operations + * - maxbytes + */ + sb->s_maxbytes = MAX_LFS_FILESIZE; + sb->s_blocksize = MYFS_BLOCKSIZE; + sb->s_blocksize_bits = MYFS_BLOCKSIZE_BITS; + sb->s_magic = MYFS_MAGIC; + sb->s_op = &myfs_ops; + + /* mode = directory & access rights (755) */ + root_inode = myfs_get_inode(sb, NULL, + S_IFDIR | S_IRWXU | S_IRGRP | + S_IXGRP | S_IROTH | S_IXOTH); + + printk(LOG_LEVEL "root inode has %d link(s)\n", root_inode->i_nlink); + + if (!root_inode) + return -ENOMEM; + + root_dentry = d_make_root(root_inode); + if (!root_dentry) + goto out_no_root; + sb->s_root = root_dentry; + + return 0; + +out_no_root: + iput(root_inode); + return -ENOMEM; +} + +static struct dentry *myfs_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) +{ + /* TODO 1/1: call superblock mount function */ + return mount_nodev(fs_type, flags, data, myfs_fill_super); +} + +/* TODO 1/6: define file_system_type structure */ +static struct file_system_type myfs_fs_type = { + .owner = THIS_MODULE, + .name = "myfs", + .mount = myfs_mount, + .kill_sb = kill_litter_super, +}; + +static int __init myfs_init(void) +{ + int err; + + /* TODO 1/1: register */ + err = register_filesystem(&myfs_fs_type); + if (err) { + printk(LOG_LEVEL "register_filesystem failed\n"); + return err; + } + + return 0; +} + +static void __exit myfs_exit(void) +{ + /* TODO 1/1: unregister */ + unregister_filesystem(&myfs_fs_type); +} + +module_init(myfs_init); +module_exit(myfs_exit); diff --git a/tools/labs/templates/filesystems/myfs/test-myfs-1.sh b/tools/labs/templates/filesystems/myfs/test-myfs-1.sh new file mode 100755 index 00000000000000..26dffe0eb4bfc2 --- /dev/null +++ b/tools/labs/templates/filesystems/myfs/test-myfs-1.sh @@ -0,0 +1,53 @@ +#!/bin/sh + +set -x + +# load module +insmod myfs.ko + +# mount filesystem +mkdir -p /mnt/myfs +mount -t myfs none /mnt/myfs +ls -laid /mnt/myfs + +cd /mnt/myfs + +# create directory +mkdir mydir +ls -la + +# create subdirectory +cd mydir +mkdir mysubdir +ls -lai + +# rename subdirectory +mv mysubdir myrenamedsubdir +ls -lai + +# delete renamed subdirectory +rmdir myrenamedsubdir +ls -la + +# create file +touch myfile +ls -lai + +# rename file +mv myfile myrenamedfile +ls -lai + +# delete renamed file +rm myrenamedfile + +# delete directory +cd .. +rmdir mydir +ls -la + +# unmount filesystem +cd .. +umount /mnt/myfs + +# unload module +rmmod myfs diff --git a/tools/labs/templates/filesystems/myfs/test-myfs-2.sh b/tools/labs/templates/filesystems/myfs/test-myfs-2.sh new file mode 100755 index 00000000000000..17e92812599aba --- /dev/null +++ b/tools/labs/templates/filesystems/myfs/test-myfs-2.sh @@ -0,0 +1,44 @@ +#!/bin/sh + +set -ex + +# load module +insmod myfs.ko + +# mount filesystem +mkdir -p /mnt/myfs +mount -t myfs none /mnt/myfs +ls -laid /mnt/myfs + +cd /mnt/myfs + +# create file +touch myfile +ls -lai + +# rename file +mv myfile myrenamedfile +ls -lai + +# create link to file +ln myrenamedfile mylink +ls -lai + +# read/write file +echo "message" > myrenamedfile +cat myrenamedfile + +# remove link to file +rm mylink +ls -la + +# delete file +rm -f myrenamedfile +ls -la + +# unmount filesystem +cd .. +umount /mnt/myfs + +# unload module +rmmod myfs diff --git a/tools/labs/templates/filesystems/myfs/test-myfs.sh b/tools/labs/templates/filesystems/myfs/test-myfs.sh new file mode 100755 index 00000000000000..c7bb43b5fb26d5 --- /dev/null +++ b/tools/labs/templates/filesystems/myfs/test-myfs.sh @@ -0,0 +1,30 @@ +#!/bin/sh + +set -ex + +#load module +insmod myfs.ko + +#mount filesystem +mkdir -p /mnt/myfs +mount -t myfs none /mnt/myfs + +#show registered filesystems +cat /proc/filesystems | grep myfs + +#show mounted filesystems +cat /proc/mounts | grep myfs + +#show filesystem statistics +stat -f /mnt/myfs + +#list all filesystem files +cd /mnt/myfs +ls -la + +#unmount filesystem +cd .. +umount /mnt/myfs + +#unload module +rmmod myfs diff --git a/tools/labs/templates/generate_skels.py b/tools/labs/templates/generate_skels.py new file mode 100755 index 00000000000000..da70ea33be3b73 --- /dev/null +++ b/tools/labs/templates/generate_skels.py @@ -0,0 +1,68 @@ +#!/usr/bin/python3 -u + +import argparse, fnmatch, glob, os.path, re, sys, shutil + +parser = argparse.ArgumentParser(description='Generate skeletons sources from full sources') +parser.add_argument('paths', metavar='path', nargs='+', help='list of files to process') +parser.add_argument('--output', help='output dir to copy processed files') +parser.add_argument('--todo', type=int, help='don\'t remove TODOs less then this', default=1) +args = parser.parse_args() + +def process_file(p, pattern, end_string=None): + f = open(p, "r") + g = open(os.path.join(args.output, p), "w") + skip_lines = 0 + end_found = True + for l in f.readlines(): + if end_string and end_found == False: + g.write(l) + if end_string in l: + end_found = True + continue + + if skip_lines > 0: + skip_lines -= 1 + m = re.search(pattern, l) + if m : + l = "%s%s%s\n" % (m.group(1), m.group(2), m.group(4)) + g.write(l) + continue + m = re.search(pattern, l) + if m: + todo=1 + if m.group(2): + todo = int(m.group(2)) + if todo >= args.todo: + if m.group(3): + skip_lines = int(m.group(3)) + else: + skip_lines = 1 + + if end_string and end_string not in l: + end_found = False + + l = "%s%s%s\n" % (m.group(1), m.group(2), m.group(4)) + g.write(l) + +for p in args.paths: + print("skel %s" % (p), sep = '') + name=os.path.basename(p) + try: + os.makedirs(os.path.join(args.output, os.path.dirname(p))) + except: + pass + + copy = False + end_string = None + if name == "Kbuild" or name == "Makefile": + pattern="(^#\s*TODO)([0-9]*)\/?([0-9]*)(:.*)" + elif fnmatch.fnmatch(name, '*.c') or fnmatch.fnmatch(name, '*.h'): + pattern="(.*/\*\s*TODO)([ 0-9]*)/?([0-9]*)(:.*)" + end_string = "*/" + else: + copy = True + + if copy: + shutil.copyfile(p, os.path.join(args.output, p)) + else: + process_file(p, pattern, end_string) diff --git a/tools/labs/templates/interrupts/Kbuild b/tools/labs/templates/interrupts/Kbuild new file mode 100644 index 00000000000000..20b33f5d968dee --- /dev/null +++ b/tools/labs/templates/interrupts/Kbuild @@ -0,0 +1,3 @@ +ccflags-y = -Wno-unused-function -Wno-unused-label -Wno-unused-variable + +obj-m = kbd.o diff --git a/tools/labs/templates/interrupts/kbd.c b/tools/labs/templates/interrupts/kbd.c new file mode 100644 index 00000000000000..684b6fe1bfc6af --- /dev/null +++ b/tools/labs/templates/interrupts/kbd.c @@ -0,0 +1,278 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +MODULE_DESCRIPTION("KBD"); +MODULE_AUTHOR("Kernel Hacker"); +MODULE_LICENSE("GPL"); + +#define MODULE_NAME "kbd" + +#define KBD_MAJOR 42 +#define KBD_MINOR 0 +#define KBD_NR_MINORS 1 + +#define I8042_KBD_IRQ 1 +#define I8042_STATUS_REG 0x64 +#define I8042_DATA_REG 0x60 + +#define BUFFER_SIZE 1024 +#define SCANCODE_RELEASED_MASK 0x80 + +struct kbd { + struct cdev cdev; + /* TODO 3: add spinlock */ + spinlock_t lock; + char buf[BUFFER_SIZE]; + size_t put_idx, get_idx, count; +} devs[1]; + +/* + * Checks if scancode corresponds to key press or release. + */ +static int is_key_press(unsigned int scancode) +{ + return !(scancode & SCANCODE_RELEASED_MASK); +} + +/* + * Return the character of the given scancode. + * Only works for alphanumeric/space/enter; returns '?' for other + * characters. + */ +static int get_ascii(unsigned int scancode) +{ + static char *row1 = "1234567890"; + static char *row2 = "qwertyuiop"; + static char *row3 = "asdfghjkl"; + static char *row4 = "zxcvbnm"; + + scancode &= ~SCANCODE_RELEASED_MASK; + if (scancode >= 0x02 && scancode <= 0x0b) + return *(row1 + scancode - 0x02); + if (scancode >= 0x10 && scancode <= 0x19) + return *(row2 + scancode - 0x10); + if (scancode >= 0x1e && scancode <= 0x26) + return *(row3 + scancode - 0x1e); + if (scancode >= 0x2c && scancode <= 0x32) + return *(row4 + scancode - 0x2c); + if (scancode == 0x39) + return ' '; + if (scancode == 0x1c) + return '\n'; + return '?'; +} + +static void put_char(struct kbd *data, char c) +{ + if (data->count >= BUFFER_SIZE) + return; + + data->buf[data->put_idx] = c; + data->put_idx = (data->put_idx + 1) % BUFFER_SIZE; + data->count++; +} + +static bool get_char(char *c, struct kbd *data) +{ + /* TODO 4/6: get char from buffer; update count and get_idx */ + if (data->count > 0) { + *c = data->buf[data->get_idx]; + data->get_idx = (data->get_idx + 1) % BUFFER_SIZE; + data->count--; + return true; + } + return false; +} + +static void reset_buffer(struct kbd *data) +{ + /* TODO 5/3: reset count, put_idx, get_idx */ + data->count = 0; + data->put_idx = 0; + data->get_idx = 0; +} + +/* + * Return the value of the DATA register. + */ +static inline u8 i8042_read_data(void) +{ + u8 val; + /* TODO 3: Read DATA register (8 bits). */ + val = inb(I8042_DATA_REG); + return val; +} + +/* TODO 2/27: implement interrupt handler */ +irqreturn_t kbd_interrupt_handle(int irq_no, void *dev_id) +{ + + unsigned int scancode = 0; + int pressed, ch; + + /* TODO 3: read the scancode */ + scancode = i8042_read_data(); + /* TODO 3/2: interpret the scancode */ + pressed = is_key_press(scancode); + ch = get_ascii(scancode); + + /* TODO 3/2: display information about the keystrokes */ + pr_info("IRQ %d: scancode=0x%x (%u) pressed=%d ch=%c\n", + irq_no, scancode, scancode, pressed, ch); + + /* TODO 3/7: store ASCII key to buffer */ + if (pressed) { + struct kbd *data = (struct kbd *)dev_id; + + spin_lock(&data->lock); + put_char(data, ch); + spin_unlock(&data->lock); + } + + return IRQ_NONE; +} + +static int kbd_open(struct inode *inode, struct file *file) +{ + struct kbd *data = container_of(inode->i_cdev, struct kbd, cdev); + + file->private_data = data; + pr_info("%s opened\n", MODULE_NAME); + return 0; +} + +static int kbd_release(struct inode *inode, struct file *file) +{ + pr_info("%s closed\n", MODULE_NAME); + return 0; +} + +/* TODO 5/12: add write operation and reset the buffer */ +static ssize_t kbd_write(struct file *file, const char __user *user_buffer, + size_t size, loff_t *offset) +{ + struct kbd *data = (struct kbd *) file->private_data; + unsigned long flags; + + spin_lock_irqsave(&data->lock, flags); + reset_buffer(data); + spin_unlock_irqrestore(&data->lock, flags); + + return size; +} + +static ssize_t kbd_read(struct file *file, char __user *user_buffer, + size_t size, loff_t *offset) +{ + struct kbd *data = (struct kbd *) file->private_data; + size_t read = 0; + /* TODO 4/18: read data from buffer */ + unsigned long flags; + char ch; + bool more = true; + + while (size--) { + spin_lock_irqsave(&data->lock, flags); + more = get_char(&ch, data); + spin_unlock_irqrestore(&data->lock, flags); + + if (!more) + break; + + if (put_user(ch, user_buffer++)) + return -EFAULT; + + read++; + } + + return read; +} + +static const struct file_operations kbd_fops = { + .owner = THIS_MODULE, + .open = kbd_open, + .release = kbd_release, + .read = kbd_read, + /* TODO 5: add write operation */ + .write = kbd_write, +}; + +static int kbd_init(void) +{ + int err; + + err = register_chrdev_region(MKDEV(KBD_MAJOR, KBD_MINOR), + KBD_NR_MINORS, MODULE_NAME); + if (err != 0) { + pr_err("register_region failed: %d\n", err); + goto out; + } + + /* TODO 1/8: request the keyboard I/O ports */ + if (request_region(I8042_DATA_REG+1, 1, MODULE_NAME) == NULL) { + err = -EBUSY; + goto out_unregister; + } + if (request_region(I8042_STATUS_REG+1, 1, MODULE_NAME) == NULL) { + err = -EBUSY; + goto out_release_region; + } + + /* TODO 3: initialize spinlock */ + spin_lock_init(&devs[0].lock); + + /* TODO 2/7: Register IRQ handler for keyboard IRQ (IRQ 1). */ + err = request_irq(I8042_KBD_IRQ, + kbd_interrupt_handle, + IRQF_SHARED, MODULE_NAME, &devs[0]); + if (err != 0) { + pr_err("request_irq failed: %d\n", err); + goto out_release_regions; + } + + cdev_init(&devs[0].cdev, &kbd_fops); + cdev_add(&devs[0].cdev, MKDEV(KBD_MAJOR, KBD_MINOR), 1); + + pr_notice("Driver %s loaded\n", MODULE_NAME); + return 0; + + /*TODO 2/4: release regions in case of error */ +out_release_regions: + release_region(I8042_STATUS_REG+1, 1); +out_release_region: + release_region(I8042_DATA_REG+1, 1); + +out_unregister: + unregister_chrdev_region(MKDEV(KBD_MAJOR, KBD_MINOR), + KBD_NR_MINORS); +out: + return err; +} + +static void kbd_exit(void) +{ + cdev_del(&devs[0].cdev); + + /* TODO 2: Free IRQ. */ + free_irq(I8042_KBD_IRQ, &devs[0]); + + /* TODO 1/2: release keyboard I/O ports */ + release_region(I8042_STATUS_REG+1, 1); + release_region(I8042_DATA_REG+1, 1); + + + unregister_chrdev_region(MKDEV(KBD_MAJOR, KBD_MINOR), + KBD_NR_MINORS); + pr_notice("Driver %s unloaded\n", MODULE_NAME); +} + +module_init(kbd_init); +module_exit(kbd_exit); diff --git a/tools/labs/templates/kernel_api/1-mem/Kbuild b/tools/labs/templates/kernel_api/1-mem/Kbuild new file mode 100644 index 00000000000000..85f6de99faec52 --- /dev/null +++ b/tools/labs/templates/kernel_api/1-mem/Kbuild @@ -0,0 +1,3 @@ +ccflags-y = -Wno-unused-function -Wno-unused-label -Wno-unused-variable + +obj-m = mem.o diff --git a/tools/labs/templates/kernel_api/1-mem/mem.c b/tools/labs/templates/kernel_api/1-mem/mem.c new file mode 100644 index 00000000000000..a18029a88d2216 --- /dev/null +++ b/tools/labs/templates/kernel_api/1-mem/mem.c @@ -0,0 +1,46 @@ +/* + * Kernel API lab + * + * mem.c - Memory allocation in Linux + */ + +#include +#include +#include +#include +#include + +MODULE_DESCRIPTION("Print memory"); +MODULE_AUTHOR("SO2"); +MODULE_LICENSE("GPL"); + +static char *mem; + +static int mem_init(void) +{ + size_t i; + + mem = kmalloc(4096 * sizeof(*mem), GFP_KERNEL); + if (mem == NULL) + goto err_mem; + + pr_info("chars: "); + for (i = 0; i < 4096; i++) { + if (isalpha(mem[i])) + printk("%c ", mem[i]); + } + pr_info("\n"); + + return 0; + +err_mem: + return -1; +} + +static void mem_exit(void) +{ + kfree(mem); +} + +module_init(mem_init); +module_exit(mem_exit); diff --git a/tools/labs/templates/kernel_api/2-sched-spin/Kbuild b/tools/labs/templates/kernel_api/2-sched-spin/Kbuild new file mode 100644 index 00000000000000..440138296e63a1 --- /dev/null +++ b/tools/labs/templates/kernel_api/2-sched-spin/Kbuild @@ -0,0 +1,3 @@ +ccflags-y = -Wno-unused-function -Wno-unused-label -Wno-unused-variable + +obj-m = sched-spin.o diff --git a/tools/labs/templates/kernel_api/2-sched-spin/sched-spin.c b/tools/labs/templates/kernel_api/2-sched-spin/sched-spin.c new file mode 100644 index 00000000000000..52fc3d2307c89c --- /dev/null +++ b/tools/labs/templates/kernel_api/2-sched-spin/sched-spin.c @@ -0,0 +1,40 @@ +/* + * Kernel API lab + * + * sched-spin.c: Sleeping in atomic context + */ + +#include +#include +#include +#include + +MODULE_DESCRIPTION("Sleep while atomic"); +MODULE_AUTHOR("SO2"); +MODULE_LICENSE("GPL"); + +static int sched_spin_init(void) +{ + spinlock_t lock; + + spin_lock_init(&lock); + + /* TODO 0/1: Use spin_lock to aquire the lock */ + spin_lock(&lock); + + set_current_state(TASK_INTERRUPTIBLE); + /* Try to sleep for 5 seconds. */ + schedule_timeout(5 * HZ); + + /* TODO 0/1: Use spin_unlock to release the lock */ + spin_unlock(&lock); + + return 0; +} + +static void sched_spin_exit(void) +{ +} + +module_init(sched_spin_init); +module_exit(sched_spin_exit); diff --git a/tools/labs/templates/kernel_api/3-memory/Kbuild b/tools/labs/templates/kernel_api/3-memory/Kbuild new file mode 100644 index 00000000000000..a29f7961d2a9e1 --- /dev/null +++ b/tools/labs/templates/kernel_api/3-memory/Kbuild @@ -0,0 +1,3 @@ +ccflags-y = -Wno-unused-function -Wno-unused-label -Wno-unused-variable + +obj-m = memory.o diff --git a/tools/labs/templates/kernel_api/3-memory/memory.c b/tools/labs/templates/kernel_api/3-memory/memory.c new file mode 100644 index 00000000000000..f6d07fcdf091ab --- /dev/null +++ b/tools/labs/templates/kernel_api/3-memory/memory.c @@ -0,0 +1,71 @@ +/* + * SO2 lab3 - task 3 + */ + +#include +#include +#include +#include +#include +#include + +MODULE_DESCRIPTION("Memory processing"); +MODULE_AUTHOR("SO2"); +MODULE_LICENSE("GPL"); + +struct task_info { + pid_t pid; + unsigned long timestamp; +}; + +static struct task_info *ti1, *ti2, *ti3, *ti4; + +static struct task_info *task_info_alloc(int pid) +{ + struct task_info *ti; + + /* TODO 1/5: allocated and initialize a task_info struct */ + ti = kmalloc(sizeof(*ti), GFP_KERNEL); + if (ti == NULL) + return NULL; + ti->pid = pid; + ti->timestamp = jiffies; + + return ti; +} + +static int memory_init(void) +{ + /* TODO 2/1: call task_info_alloc for current pid */ + ti1 = task_info_alloc(current->pid); + + /* TODO 2/1: call task_info_alloc for parent PID */ + ti2 = task_info_alloc(current->parent->pid); + + /* TODO 2/1: call task_info alloc for next process PID */ + ti3 = task_info_alloc(next_task(current)->pid); + + /* TODO 2/1: call task_info_alloc for next process of the next process */ + ti4 = task_info_alloc(next_task(next_task(current))->pid); + + return 0; +} + +static void memory_exit(void) +{ + + /* TODO 3/4: print ti* field values */ + printk("pid: %d, timestamp: %lu\n", ti1->pid, ti1->timestamp); + printk("pid: %d, timestamp: %lu\n", ti2->pid, ti2->timestamp); + printk("pid: %d, timestamp: %lu\n", ti3->pid, ti3->timestamp); + printk("pid: %d, timestamp: %lu\n", ti4->pid, ti4->timestamp); + + /* TODO 4/4: free ti* structures */ + kfree(ti1); + kfree(ti2); + kfree(ti3); + kfree(ti4); +} + +module_init(memory_init); +module_exit(memory_exit); diff --git a/tools/labs/templates/kernel_api/4-list/Kbuild b/tools/labs/templates/kernel_api/4-list/Kbuild new file mode 100644 index 00000000000000..7187139dbdb7af --- /dev/null +++ b/tools/labs/templates/kernel_api/4-list/Kbuild @@ -0,0 +1,3 @@ +ccflags-y = -Wno-unused-function -Wno-unused-label -Wno-unused-variable + +obj-m = list.o diff --git a/tools/labs/templates/kernel_api/4-list/list.c b/tools/labs/templates/kernel_api/4-list/list.c new file mode 100644 index 00000000000000..4745b75c59452a --- /dev/null +++ b/tools/labs/templates/kernel_api/4-list/list.c @@ -0,0 +1,100 @@ +/* + * Kernel API lab + * + * list.c: Working with lists + * + */ + +#include +#include +#include +#include +#include +#include + +MODULE_DESCRIPTION("Use list to process task info"); +MODULE_AUTHOR("SO2"); +MODULE_LICENSE("GPL"); + +struct task_info { + pid_t pid; + unsigned long timestamp; + struct list_head list; +}; + +static struct list_head head; + +static struct task_info *task_info_alloc(int pid) +{ + struct task_info *ti; + + ti = kmalloc(sizeof(*ti), GFP_KERNEL); + if (ti == NULL) + return NULL; + ti->pid = pid; + ti->timestamp = jiffies; + + return ti; +} + +static void task_info_add_to_list(int pid) +{ + struct task_info *ti; + + /* TODO 1/2: Allocate task_info and add it to list */ + ti = task_info_alloc(pid); + list_add(&ti->list, &head); +} + +static void task_info_add_for_current(void) +{ + /* Add current, parent, next and next of next to the list */ + task_info_add_to_list(current->pid); + task_info_add_to_list(current->parent->pid); + task_info_add_to_list(next_task(current)->pid); + task_info_add_to_list(next_task(next_task(current))->pid); +} + +static void task_info_print_list(const char *msg) +{ + struct list_head *p; + struct task_info *ti; + + pr_info("%s: [ ", msg); + list_for_each(p, &head) { + ti = list_entry(p, struct task_info, list); + pr_info("(%d, %lu) ", ti->pid, ti->timestamp); + } + pr_info("]\n"); +} + +static void task_info_purge_list(void) +{ + struct list_head *p, *q; + struct task_info *ti; + + /* TODO 2/5: Iterate over the list and delete all elements */ + list_for_each_safe(p, q, &head) { + ti = list_entry(p, struct task_info, list); + list_del(p); + kfree(ti); + } +} + +static int list_init(void) +{ + INIT_LIST_HEAD(&head); + + task_info_add_for_current(); + + return 0; +} + +static void list_exit(void) +{ + task_info_print_list("before exiting"); + task_info_purge_list(); +} + +module_init(list_init); +module_exit(list_exit); diff --git a/tools/labs/templates/kernel_api/5-list-full/Kbuild b/tools/labs/templates/kernel_api/5-list-full/Kbuild new file mode 100644 index 00000000000000..45358ad9ca1503 --- /dev/null +++ b/tools/labs/templates/kernel_api/5-list-full/Kbuild @@ -0,0 +1,3 @@ +ccflags-y = -Wno-unused-function -Wno-unused-label -Wno-unused-variable + +obj-m = list-full.o diff --git a/tools/labs/templates/kernel_api/5-list-full/list-full.c b/tools/labs/templates/kernel_api/5-list-full/list-full.c new file mode 100644 index 00000000000000..1184fcb1e91478 --- /dev/null +++ b/tools/labs/templates/kernel_api/5-list-full/list-full.c @@ -0,0 +1,145 @@ +/* + * Kernel API lab + * + * list-full.c: Working with lists (advanced) + */ + +#include +#include +#include +#include +#include +#include + +MODULE_DESCRIPTION("Full list processing"); +MODULE_AUTHOR("SO2"); +MODULE_LICENSE("GPL"); + +struct task_info { + pid_t pid; + unsigned long timestamp; + atomic_t count; + struct list_head list; +}; + +static struct list_head head; + +static struct task_info *task_info_alloc(int pid) +{ + struct task_info *ti; + + ti = kmalloc(sizeof(*ti), GFP_KERNEL); + if (ti == NULL) + return NULL; + ti->pid = pid; + ti->timestamp = jiffies; + atomic_set(&ti->count, 0); + + return ti; +} + +static struct task_info *task_info_find_pid(int pid) +{ + struct list_head *p; + struct task_info *ti; + + /* TODO 1/5: Look for pid and return task_info or NULL if not found */ + list_for_each(p, &head) { + ti = list_entry(p, struct task_info, list); + if (ti->pid == pid) + return ti; + } + + return NULL; +} + +static void task_info_add_to_list(int pid) +{ + struct task_info *ti; + + ti = task_info_find_pid(pid); + if (ti != NULL) { + ti->timestamp = jiffies; + atomic_inc(&ti->count); + return; + } + + ti = task_info_alloc(pid); + list_add(&ti->list, &head); +} + +static void task_info_add_for_current(void) +{ + task_info_add_to_list(current->pid); + task_info_add_to_list(current->parent->pid); + task_info_add_to_list(next_task(current)->pid); + task_info_add_to_list(next_task(next_task(current))->pid); +} + +static void task_info_print_list(const char *msg) +{ + struct list_head *p; + struct task_info *ti; + + pr_info("%s: [ ", msg); + list_for_each(p, &head) { + ti = list_entry(p, struct task_info, list); + pr_info("(%d, %lu) ", ti->pid, ti->timestamp); + } + pr_info("]\n"); +} + +static void task_info_remove_expired(void) +{ + struct list_head *p, *q; + struct task_info *ti; + + list_for_each_safe(p, q, &head) { + ti = list_entry(p, struct task_info, list); + if (jiffies - ti->timestamp > 3 * HZ && atomic_read(&ti->count) < 5) { + list_del(p); + kfree(ti); + } + } +} + +static void task_info_purge_list(void) +{ + struct list_head *p, *q; + struct task_info *ti; + + list_for_each_safe(p, q, &head) { + ti = list_entry(p, struct task_info, list); + list_del(p); + kfree(ti); + } +} + +static int list_full_init(void) +{ + INIT_LIST_HEAD(&head); + + task_info_add_for_current(); + task_info_print_list("after first add"); + + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(5 * HZ); + + return 0; +} + +static void list_full_exit(void) +{ + struct task_info *ti; + + /* TODO 2/2: Ensure that at least one task is not deleted */ + ti = list_entry(head.prev, struct task_info, list); + atomic_set(&ti->count, 10); + + task_info_remove_expired(); + task_info_print_list("after removing expired"); + task_info_purge_list(); +} + +module_init(list_full_init); +module_exit(list_full_exit); diff --git a/tools/labs/templates/kernel_api/6-list-sync/Kbuild b/tools/labs/templates/kernel_api/6-list-sync/Kbuild new file mode 100644 index 00000000000000..8105af70665ff9 --- /dev/null +++ b/tools/labs/templates/kernel_api/6-list-sync/Kbuild @@ -0,0 +1,3 @@ +ccflags-y = -Wno-unused-function -Wno-unused-label -Wno-unused-variable + +obj-m = list-sync.o diff --git a/tools/labs/templates/kernel_api/6-list-sync/list-sync.c b/tools/labs/templates/kernel_api/6-list-sync/list-sync.c new file mode 100644 index 00000000000000..ae837ed4795228 --- /dev/null +++ b/tools/labs/templates/kernel_api/6-list-sync/list-sync.c @@ -0,0 +1,176 @@ +/* + * Linux API lab + * + * list-sync.c - Synchronize access to a list + */ + +#include +#include +#include +#include +#include +#include + +MODULE_DESCRIPTION("Full list processing with synchronization"); +MODULE_AUTHOR("SO2"); +MODULE_LICENSE("GPL"); + +struct task_info { + pid_t pid; + unsigned long timestamp; + atomic_t count; + struct list_head list; +}; + +static struct list_head head; + +/* TODO 1: you can use either a spinlock or rwlock, define it here */ +DEFINE_RWLOCK(lock); + +static struct task_info *task_info_alloc(int pid) +{ + struct task_info *ti; + + ti = kmalloc(sizeof(*ti), GFP_KERNEL); + if (ti == NULL) + return NULL; + ti->pid = pid; + ti->timestamp = jiffies; + atomic_set(&ti->count, 0); + + return ti; +} + +static struct task_info *task_info_find_pid(int pid) +{ + struct list_head *p; + struct task_info *ti; + + list_for_each(p, &head) { + ti = list_entry(p, struct task_info, list); + if (ti->pid == pid) { + return ti; + } + } + + return NULL; +} + +static void task_info_add_to_list(int pid) +{ + struct task_info *ti; + + /* TODO 1: Protect list, is this read or write access? */ + write_lock(&lock); + ti = task_info_find_pid(pid); + if (ti != NULL) { + ti->timestamp = jiffies; + atomic_inc(&ti->count); + /* TODO: Guess why this comment was added here */ + write_unlock(&lock); + return; + } + /* TODO 1: critical section ends here */ + write_unlock(&lock); + + ti = task_info_alloc(pid); + /* TODO 1: protect list access, is this read or write access? */ + write_lock(&lock); + list_add(&ti->list, &head); + /* TODO 1: critical section ends here */ + write_unlock(&lock); +} + +void task_info_add_for_current(void) +{ + task_info_add_to_list(current->pid); + task_info_add_to_list(current->parent->pid); + task_info_add_to_list(next_task(current)->pid); + task_info_add_to_list(next_task(next_task(current))->pid); +} +/* TODO 2: Export the kernel symbol */ +EXPORT_SYMBOL(task_info_add_for_current); + +void task_info_print_list(const char *msg) +{ + struct list_head *p; + struct task_info *ti; + + pr_info("%s: [ ", msg); + + /* TODO 1: Protect list, is this read or write access? */ + read_lock(&lock); + list_for_each(p, &head) { + ti = list_entry(p, struct task_info, list); + pr_info("(%d, %lu) ", ti->pid, ti->timestamp); + } + /* TODO 1: Critical section ends here */ + read_unlock(&lock); + pr_info("]\n"); +} +/* TODO 2: Export the kernel symbol */ +EXPORT_SYMBOL(task_info_print_list); + +void task_info_remove_expired(void) +{ + struct list_head *p, *q; + struct task_info *ti; + + /* TODO 1: Protect list, is this read or write access? */ + write_lock(&lock); + list_for_each_safe(p, q, &head) { + ti = list_entry(p, struct task_info, list); + if (jiffies - ti->timestamp > 3 * HZ && atomic_read(&ti->count) < 5) { + list_del(p); + kfree(ti); + } + } + /* TODO 1: Critical section ends here */ + write_unlock(&lock); +} +/* TODO 2: Export the kernel symbol */ +EXPORT_SYMBOL(task_info_remove_expired); + +static void task_info_purge_list(void) +{ + struct list_head *p, *q; + struct task_info *ti; + + /* TODO 1: Protect list, is this read or write access? */ + write_lock(&lock); + list_for_each_safe(p, q, &head) { + ti = list_entry(p, struct task_info, list); + list_del(p); + kfree(ti); + } + /* TODO 1: Critical sections ends here */ + write_unlock(&lock); +} + +static int list_sync_init(void) +{ + INIT_LIST_HEAD(&head); + + task_info_add_for_current(); + task_info_print_list("after first add"); + + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(5 * HZ); + + return 0; +} + +static void list_sync_exit(void) +{ + struct task_info *ti; + + ti = list_entry(head.prev, struct task_info, list); + atomic_set(&ti->count, 10); + + task_info_remove_expired(); + task_info_print_list("after removing expired"); + task_info_purge_list(); +} + +module_init(list_sync_init); +module_exit(list_sync_exit); diff --git a/tools/labs/templates/kernel_api/7-list-test/Kbuild b/tools/labs/templates/kernel_api/7-list-test/Kbuild new file mode 100644 index 00000000000000..324750ee764344 --- /dev/null +++ b/tools/labs/templates/kernel_api/7-list-test/Kbuild @@ -0,0 +1,3 @@ +ccflags-y = -Wno-unused-function -Wno-unused-label -Wno-unused-variable + +obj-m = list-test.o diff --git a/tools/labs/templates/kernel_api/7-list-test/list-test.c b/tools/labs/templates/kernel_api/7-list-test/list-test.c new file mode 100644 index 00000000000000..9a39be11db8684 --- /dev/null +++ b/tools/labs/templates/kernel_api/7-list-test/list-test.c @@ -0,0 +1,34 @@ +/* + * SO2 lab3 - task 7 + */ + +#include +#include +#include + +MODULE_DESCRIPTION("Test list processing"); +MODULE_AUTHOR("SO2"); +MODULE_LICENSE("GPL"); + +extern void task_info_add_for_current(void); +extern void task_info_remove_expired(void); +extern void task_info_print_list(const char *msg); + +static int list_test_init(void) +{ + /* TODO 1/0: Uncomment after exporting the symbols in 6-list-sync. */ + // task_info_add_for_current(); + // task_info_print_list("after new addition"); + + return 0; +} + +static void list_test_exit(void) +{ + /* TODO 1/0: Uncomment after exporting the symbols in 6-list-sync. */ + // task_info_remove_expired(); + // task_info_print_list("after removing expired"); +} + +module_init(list_test_init); +module_exit(list_test_exit); diff --git a/tools/labs/templates/kernel_modules/1-2-test-mod/Kbuild b/tools/labs/templates/kernel_modules/1-2-test-mod/Kbuild new file mode 100644 index 00000000000000..3674ef2591b3d9 --- /dev/null +++ b/tools/labs/templates/kernel_modules/1-2-test-mod/Kbuild @@ -0,0 +1,3 @@ +ccflags-y = -Wno-unused-function -Wno-unused-label -Wno-unused-variable -DDEBUG + +obj-m = hello_mod.o diff --git a/tools/labs/templates/kernel_modules/1-2-test-mod/hello_mod.c b/tools/labs/templates/kernel_modules/1-2-test-mod/hello_mod.c new file mode 100644 index 00000000000000..0ed6520054f72a --- /dev/null +++ b/tools/labs/templates/kernel_modules/1-2-test-mod/hello_mod.c @@ -0,0 +1,21 @@ +#include +#include +#include + +MODULE_DESCRIPTION("Simple module"); +MODULE_AUTHOR("Kernel Hacker"); +MODULE_LICENSE("GPL"); + +static int my_hello_init(void) +{ + pr_debug("Hello!\n"); + return 0; +} + +static void hello_exit(void) +{ + pr_debug("Goodbye!\n"); +} + +module_init(my_hello_init); +module_exit(hello_exit); diff --git a/tools/labs/templates/kernel_modules/3-error-mod/Kbuild b/tools/labs/templates/kernel_modules/3-error-mod/Kbuild new file mode 100644 index 00000000000000..7bf41fb63f9589 --- /dev/null +++ b/tools/labs/templates/kernel_modules/3-error-mod/Kbuild @@ -0,0 +1,3 @@ +ccflags-y = -Wno-unused-function -Wno-unused-label -Wno-unused-variable + +obj-m = err_mod.o diff --git a/tools/labs/templates/kernel_modules/3-error-mod/err_mod.c b/tools/labs/templates/kernel_modules/3-error-mod/err_mod.c new file mode 100644 index 00000000000000..057036642574af --- /dev/null +++ b/tools/labs/templates/kernel_modules/3-error-mod/err_mod.c @@ -0,0 +1,26 @@ +#include +#include +/* TODO: add missing kernel headers */ +#include + +MODULE_DESCRIPTION("Error module"); +MODULE_AUTHOR("Kernel Hacker"); +MODULE_LICENSE("GPL"); + +static int n1, n2; + +static int err_init(void) +{ + n1 = 1; n2 = 2; + pr_info("n1 is %d, n2 is %d\n", n1, n2); + + return 0; +} + +static void err_exit(void) +{ + pr_info("sum is %d\n", n1 + n2); +} + +module_init(err_init); +module_exit(err_exit); diff --git a/tools/labs/templates/kernel_modules/4-multi-mod/Kbuild b/tools/labs/templates/kernel_modules/4-multi-mod/Kbuild new file mode 100644 index 00000000000000..1d211ca44c7035 --- /dev/null +++ b/tools/labs/templates/kernel_modules/4-multi-mod/Kbuild @@ -0,0 +1,5 @@ +ccflags-y = -Wno-unused-function -Wno-unused-label -Wno-unused-variable + +# TODO/2: add rules to create a multi object module +obj-m = multi-mod.o +multi-mod-y = mod1.o mod2.o diff --git a/tools/labs/templates/kernel_modules/4-multi-mod/mod1.c b/tools/labs/templates/kernel_modules/4-multi-mod/mod1.c new file mode 100644 index 00000000000000..08511866c460a1 --- /dev/null +++ b/tools/labs/templates/kernel_modules/4-multi-mod/mod1.c @@ -0,0 +1,27 @@ +#include +#include +#include + +MODULE_DESCRIPTION("Multi-file module"); +MODULE_AUTHOR("Kernel Hacker"); +MODULE_LICENSE("GPL"); + +extern int add(int a, int b); + +static int n1, n2; + +static int my_hello_init(void) +{ + n1 = 1; n2 = 2; + pr_info("n1 is %d, n2 is %d\n", n1, n2); + + return 0; +} + +static void hello_exit(void) +{ + pr_info("sum is %d\n", add(n1, n2)); +} + +module_init(my_hello_init); +module_exit(hello_exit); diff --git a/tools/labs/templates/kernel_modules/4-multi-mod/mod2.c b/tools/labs/templates/kernel_modules/4-multi-mod/mod2.c new file mode 100644 index 00000000000000..7c923bb3ac5488 --- /dev/null +++ b/tools/labs/templates/kernel_modules/4-multi-mod/mod2.c @@ -0,0 +1,4 @@ +int add(int a, int b) +{ + return a + b; +} diff --git a/tools/labs/templates/kernel_modules/5-oops-mod/Kbuild b/tools/labs/templates/kernel_modules/5-oops-mod/Kbuild new file mode 100644 index 00000000000000..09e3be5e8ab20c --- /dev/null +++ b/tools/labs/templates/kernel_modules/5-oops-mod/Kbuild @@ -0,0 +1,4 @@ +# TODO: add flags to generate debug information +ccflags-y = -Wno-unused-function -Wno-unused-label -Wno-unused-variable + +obj-m = oops_mod.o diff --git a/tools/labs/templates/kernel_modules/5-oops-mod/oops_mod.c b/tools/labs/templates/kernel_modules/5-oops-mod/oops_mod.c new file mode 100644 index 00000000000000..9fd1448572a94a --- /dev/null +++ b/tools/labs/templates/kernel_modules/5-oops-mod/oops_mod.c @@ -0,0 +1,27 @@ +#include +#include +#include +#include + +MODULE_DESCRIPTION("Oops generating module"); +MODULE_AUTHOR("So2rul Esforever"); +MODULE_LICENSE("GPL"); + +static int my_oops_init(void) +{ + char *p = 0; + + pr_info("before init\n"); + *p = 'a'; + pr_info("after init\n"); + + return 0; +} + +static void my_oops_exit(void) +{ + pr_info("module goes all out\n"); +} + +module_init(my_oops_init); +module_exit(my_oops_exit); diff --git a/tools/labs/templates/kernel_modules/6-cmd-mod/Kbuild b/tools/labs/templates/kernel_modules/6-cmd-mod/Kbuild new file mode 100644 index 00000000000000..2c5fe9cfd4de33 --- /dev/null +++ b/tools/labs/templates/kernel_modules/6-cmd-mod/Kbuild @@ -0,0 +1,3 @@ +ccflags-y = -Wno-unused-function -Wno-unused-label -Wno-unused-variable + +obj-m = cmd_mod.o diff --git a/tools/labs/templates/kernel_modules/6-cmd-mod/cmd_mod.c b/tools/labs/templates/kernel_modules/6-cmd-mod/cmd_mod.c new file mode 100644 index 00000000000000..3bd758897f3600 --- /dev/null +++ b/tools/labs/templates/kernel_modules/6-cmd-mod/cmd_mod.c @@ -0,0 +1,26 @@ +#include +#include +#include + +MODULE_DESCRIPTION("Command-line args module"); +MODULE_AUTHOR("Kernel Hacker"); +MODULE_LICENSE("GPL"); + +static char *str = "the worm"; + +module_param(str, charp, 0000); +MODULE_PARM_DESC(str, "A simple string"); + +static int __init cmd_init(void) +{ + pr_info("Early bird gets %s\n", str); + return 0; +} + +static void __exit cmd_exit(void) +{ + pr_info("Exit, stage left\n"); +} + +module_init(cmd_init); +module_exit(cmd_exit); diff --git a/tools/labs/templates/kernel_modules/7-list-proc/Kbuild b/tools/labs/templates/kernel_modules/7-list-proc/Kbuild new file mode 100644 index 00000000000000..45eb7676b7ec51 --- /dev/null +++ b/tools/labs/templates/kernel_modules/7-list-proc/Kbuild @@ -0,0 +1,3 @@ +ccflags-y = -Wno-unused-function -Wno-unused-label -Wno-unused-variable + +obj-m = list_proc.o diff --git a/tools/labs/templates/kernel_modules/7-list-proc/list_proc.c b/tools/labs/templates/kernel_modules/7-list-proc/list_proc.c new file mode 100644 index 00000000000000..96a659bd277f03 --- /dev/null +++ b/tools/labs/templates/kernel_modules/7-list-proc/list_proc.c @@ -0,0 +1,35 @@ +#include +#include +#include +/* TODO: add missing headers */ +#include + +MODULE_DESCRIPTION("List current processes"); +MODULE_AUTHOR("Kernel Hacker"); +MODULE_LICENSE("GPL"); + +static int my_proc_init(void) +{ + struct task_struct *p; + + /* TODO/2: print current process pid and its name */ + pr_info("Current process: pid = %d; comm = %s\n", + current->pid, current->comm); + + /* TODO/3: print the pid and name of all processes */ + pr_info("\nProcess list:\n\n"); + for_each_process(p) + pr_info("pid = %d; comm = %s\n", p->pid, p->comm); + + return 0; +} + +static void my_proc_exit(void) +{ + /* TODO/2: print current process pid and name */ + pr_info("Current process: pid = %d; comm = %s\n", + current->pid, current->comm); +} + +module_init(my_proc_init); +module_exit(my_proc_exit); diff --git a/tools/labs/templates/kernel_modules/8-kdb/Kbuild b/tools/labs/templates/kernel_modules/8-kdb/Kbuild new file mode 100644 index 00000000000000..4453b28ab39c4d --- /dev/null +++ b/tools/labs/templates/kernel_modules/8-kdb/Kbuild @@ -0,0 +1,3 @@ +ccflags-y = -Wno-unused-function -Wno-unused-label -Wno-unused-variable + +obj-m = hello_kdb.o diff --git a/tools/labs/templates/kernel_modules/8-kdb/hello_kdb.c b/tools/labs/templates/kernel_modules/8-kdb/hello_kdb.c new file mode 100644 index 00000000000000..656a265d288927 --- /dev/null +++ b/tools/labs/templates/kernel_modules/8-kdb/hello_kdb.c @@ -0,0 +1,142 @@ +#include +#include +#include + +int kdb_write_address; +EXPORT_SYMBOL(kdb_write_address); + +noinline void dummy_func18(void) +{ + panic("Hello KDB has paniced!"); +} +noinline void dummy_func17(void) +{ + dummy_func18(); +} +noinline void dummy_func16(void) +{ + dummy_func17(); +} +noinline void dummy_func15(void) +{ + dummy_func16(); +} +noinline void dummy_func14(void) +{ + dummy_func15(); +} +noinline void dummy_func13(void) +{ + dummy_func14(); +} +noinline void dummy_func12(void) +{ + dummy_func13(); +} +noinline void dummy_func11(void) +{ + dummy_func12(); +} +noinline void dummy_func10(void) +{ + dummy_func11(); +} +noinline void dummy_func9(void) +{ + dummy_func10(); +} +noinline void dummy_func8(void) +{ + dummy_func9(); +} +noinline void dummy_func7(void) +{ + dummy_func8(); +} +noinline void dummy_func6(void) +{ + dummy_func7(); +} +noinline void dummy_func5(void) +{ + dummy_func6(); +} +noinline void dummy_func4(void) +{ + dummy_func5(); +} +noinline void dummy_func3(void) +{ + dummy_func4(); +} +noinline void dummy_func2(void) +{ + dummy_func3(); +} +noinline void dummy_func1(void) +{ + dummy_func2(); +} + +static int hello_proc_show(struct seq_file *m, void *v) { + seq_printf(m, "Hello proc!\n"); + return 0; +} + +static int hello_proc_open(struct inode *inode, struct file *file) { + return single_open(file, hello_proc_show, NULL); +} + +static int edit_write(struct file *file, const char *buffer, + size_t count, loff_t *data) +{ + kdb_write_address += 1; + return count; +} + +static int bug_write(struct file *file, const char *buffer, + size_t count, loff_t *data) +{ + dummy_func1(); + return count; +} + +static const struct proc_ops edit_proc_ops = { + .proc_open = hello_proc_open, + .proc_read = seq_read, + .proc_write = edit_write, + .proc_lseek = seq_lseek, + .proc_release = single_release, +}; + +static const struct proc_ops bug_proc_ops = { + .proc_open = hello_proc_open, + .proc_read = seq_read, + .proc_write = bug_write, + .proc_lseek = seq_lseek, + .proc_release = single_release, +}; + +static int __init hello_proc_init(void) { + struct proc_dir_entry *file; + file = proc_create("hello_kdb_bug", 0, NULL, &bug_proc_ops); + if (file == NULL) { + return -ENOMEM; + } + + file = proc_create("hello_kdb_break", 0, NULL, &edit_proc_ops); + if (file == NULL) { + remove_proc_entry("hello_kdb_bug", NULL); + return -ENOMEM; + } + return 0; +} + +static void __exit hello_proc_exit(void) { + remove_proc_entry("hello_kdb_bug", NULL); + remove_proc_entry("hello_kdb_break", NULL); +} + +MODULE_LICENSE("GPL"); +module_init(hello_proc_init); +module_exit(hello_proc_exit); diff --git a/tools/labs/templates/kernel_modules/9-dyndbg/Kbuild b/tools/labs/templates/kernel_modules/9-dyndbg/Kbuild new file mode 100644 index 00000000000000..a0d5f3af5202f1 --- /dev/null +++ b/tools/labs/templates/kernel_modules/9-dyndbg/Kbuild @@ -0,0 +1,3 @@ +EXTRA_CFLAGS = -Wall -g + +obj-m = dyndbg.o diff --git a/tools/labs/templates/kernel_modules/9-dyndbg/dyndbg.c b/tools/labs/templates/kernel_modules/9-dyndbg/dyndbg.c new file mode 100644 index 00000000000000..e9071e5a723535 --- /dev/null +++ b/tools/labs/templates/kernel_modules/9-dyndbg/dyndbg.c @@ -0,0 +1,32 @@ +#include +#include +#include + +MODULE_DESCRIPTION("Dyndbg kernel module"); +MODULE_AUTHOR("Dyndbg"); +MODULE_LICENSE("GPL"); + +void my_debug_func(void) +{ + pr_debug("Important dyndbg debug message1\n"); + pr_debug("Important dyndbg debug message2\n"); + pr_debug("Verbose dyndbg debug message\n"); +} +EXPORT_SYMBOL(my_debug_func); + + +static int dyndbg_init(void) +{ + printk(KERN_INFO "Hi dyndbg!\n" ); + my_debug_func(); + return 0; +} + +static void dyndbg_exit(void) +{ + printk(KERN_INFO "Bye dyndbg!\n" ); + my_debug_func(); +} + +module_init(dyndbg_init); +module_exit(dyndbg_exit); diff --git a/tools/labs/templates/kernel_profiling/0-demo/Makefile b/tools/labs/templates/kernel_profiling/0-demo/Makefile new file mode 100644 index 00000000000000..340583100b531a --- /dev/null +++ b/tools/labs/templates/kernel_profiling/0-demo/Makefile @@ -0,0 +1,11 @@ +CFLAGS=-Wall -m32 -g +LDFLAGS=-static -m32 + +all: io-app + +io-app: io-app.o + +.PHONY: clean + +clean: + -rm -f *~ *.o io-app lab008.data diff --git a/tools/labs/templates/kernel_profiling/0-demo/io-app.c b/tools/labs/templates/kernel_profiling/0-demo/io-app.c new file mode 100644 index 00000000000000..08331c29d37a18 --- /dev/null +++ b/tools/labs/templates/kernel_profiling/0-demo/io-app.c @@ -0,0 +1,76 @@ +/* + * lab008 - sync disk writes to a file, with (syscall) latency outliers. + * + * 21-May-2015 Brendan Gregg Created this. + */ + +#include +#include +#include +#include +#include +#include +#include + +const char *datafile = "lab008.data"; + +#define BUFSIZE (8 * 1024) +#define BIGSIZE (10 * 1024 * 1024) +#define FILESIZE (10 * 1024 * 1024) + +void +write_log(int fd) +{ + char *buf, *big; + long long i; + int ret, j; + + buf = malloc(BUFSIZE); + big = malloc(BIGSIZE); + if (buf == NULL || big == NULL) { + printf("ERROR: malloc buffers.\n"); + exit(1); + } + bzero(buf, BUFSIZE); + bzero(big, BIGSIZE); + + for (;;) { + for (i = 0, j = 0; i < FILESIZE;) { + if ((j++ % 100) == 0) { + ret = write(fd, big, BIGSIZE); + i += BIGSIZE; + } else { + ret = write(fd, buf, BUFSIZE); + i += BUFSIZE; + } + + if (ret < 0) { + printf("ERROR: write error.\n"); + exit(2); + } + } + + if (lseek(fd, 0, SEEK_SET) < 0) { + printf("ERROR: seek() failed.\n"); + exit(3); + } + } + + free(buf); + free(big); +} + +int +main() +{ + int fd; + + if ((fd = open(datafile, O_CREAT | O_WRONLY | O_SYNC, 0644)) < 0) { + printf("ERROR: writing to %s\n", datafile); + exit(1); + } + + write_log(fd); + + return (0); +} diff --git a/tools/labs/templates/kernel_profiling/0-demo/run-io-app.sh b/tools/labs/templates/kernel_profiling/0-demo/run-io-app.sh new file mode 100755 index 00000000000000..d9a4f758a6dbde --- /dev/null +++ b/tools/labs/templates/kernel_profiling/0-demo/run-io-app.sh @@ -0,0 +1,4 @@ +#!/bin/bash + +dd if=/dev/urandom of=lab008.data bs=10K count=1K +./io-app diff --git a/tools/labs/templates/kernel_profiling/1-io/Kbuild b/tools/labs/templates/kernel_profiling/1-io/Kbuild new file mode 100644 index 00000000000000..d780cbaf86e0fa --- /dev/null +++ b/tools/labs/templates/kernel_profiling/1-io/Kbuild @@ -0,0 +1,3 @@ +ccflags-y = -Wno-unused-function -Wno-unused-label -Wno-unused-variable + +obj-m := io.o diff --git a/tools/labs/templates/kernel_profiling/1-io/io.c b/tools/labs/templates/kernel_profiling/1-io/io.c new file mode 100644 index 00000000000000..36c76e09d4e9cc --- /dev/null +++ b/tools/labs/templates/kernel_profiling/1-io/io.c @@ -0,0 +1,57 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#define MY_MAJOR 42 +#define MY_MINOR 0 +#define MODULE_NAME "deferred" + +#define TIMER_TYPE_NONE -1 +#define TIMER_TYPE_SET 0 +#define TIMER_TYPE_ALLOC 1 +#define TIMER_TYPE_MON 2 + +MODULE_DESCRIPTION("Generate disruptive interrupts"); +MODULE_AUTHOR("SO2"); +MODULE_LICENSE("GPL"); + +struct timer_list timer; + +static void timer_handler(struct timer_list *tl) +{ + unsigned long deadline = jiffies + HZ; + + while (jiffies < deadline) { + (void)0; + } + mod_timer(&timer, jiffies + HZ); +} + +static int deferred_init(void) +{ + int err; + + pr_info("[deferred_init] Init module\n"); + + timer_setup(&timer, timer_handler, 0); + mod_timer(&timer, jiffies + 5 * HZ); + + return 0; +} + +static void deferred_exit(void) +{ + struct mon_proc *p, *n; + + pr_info("[deferred_exit] Exit module\n" ); + + del_timer_sync(&timer); +} + +module_init(deferred_init); +module_exit(deferred_exit); diff --git a/tools/labs/templates/kernel_profiling/2-scheduling/Makefile b/tools/labs/templates/kernel_profiling/2-scheduling/Makefile new file mode 100644 index 00000000000000..3c5406eb48fb88 --- /dev/null +++ b/tools/labs/templates/kernel_profiling/2-scheduling/Makefile @@ -0,0 +1,9 @@ +CFLAGS = -Wall -m32 -pthread -g +LDFLAGS = -static -m32 -pthread + +scheduling: scheduling.o + +.PHONY: clean + +clean: + -rm -f *~ *.o scheduling diff --git a/tools/labs/templates/kernel_profiling/2-scheduling/scheduling.c b/tools/labs/templates/kernel_profiling/2-scheduling/scheduling.c new file mode 100644 index 00000000000000..e3cf550e24bcba --- /dev/null +++ b/tools/labs/templates/kernel_profiling/2-scheduling/scheduling.c @@ -0,0 +1,49 @@ +#include +#include +#include +#include +#include +#include + +void helper(int i) { + printf("%d\n", i); +} + +void * thread_start(void *arg) { + + helper((int) arg); + pthread_exit(NULL); +} + +int main(int argc, char *argv[]) { + + int pid = 0; + pthread_t tid[300]; + struct timeval begin, end; + + if (argc < 1) { + printf("./scheduling \n"); + return -1; + } + + gettimeofday(&begin, NULL); + + for (int i = 0; i < 300; i++) { + if (atoi(argv[1]) == 0) { + pid = pthread_create(&tid[i], NULL, &thread_start, (void *) i); + if (pid != 0) { + break; + } + } else { + pid = fork(); + if (pid == 0) { + helper(i); + break; + } + } + } + + gettimeofday(&end, NULL); + + return 0; +} diff --git a/tools/labs/templates/kernel_profiling/3-memory/Makefile b/tools/labs/templates/kernel_profiling/3-memory/Makefile new file mode 100644 index 00000000000000..c271cbe804498d --- /dev/null +++ b/tools/labs/templates/kernel_profiling/3-memory/Makefile @@ -0,0 +1,9 @@ +CFLAGS=-Wall -m32 +LDFLAGS=-static -m32 + +memory: memory.o + +.PHONY: clean + +clean: + -rm -f *~ *.o memory diff --git a/tools/labs/templates/kernel_profiling/3-memory/memory.c b/tools/labs/templates/kernel_profiling/3-memory/memory.c new file mode 100644 index 00000000000000..f9d5f3ee15c94e --- /dev/null +++ b/tools/labs/templates/kernel_profiling/3-memory/memory.c @@ -0,0 +1,82 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +int main(int argc, char *argv[]) { + + int src_fd, dst_fd, mode; + struct stat st; + unsigned long to_write, size, blk_size; + char *src_p, *dst_p, *buf; + + if (argc < 3) { + printf("./memory \n"); + return -1; + } + + mode = atoi(argv[1]); + blk_size = atoi(argv[2]); + + printf("mode %d blk_size %ld src %s dst %s\n", + mode, blk_size, argv[3], argv[4]); + + src_fd = open(argv[3], O_RDONLY); + if (src_fd < 0) + return src_fd; + + stat(argv[3], &st); + size = to_write = st.st_size; + + if (mode == 0) { + src_p = mmap(NULL, size, PROT_READ, MAP_SHARED, src_fd, 0); + if (src_p < 0) + return -1; + } + + dst_fd = open(argv[4], O_CREAT | O_RDWR | O_TRUNC, S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH); + if (dst_fd < 0) + return -1; + + ftruncate(dst_fd, size); + + if (mode == 0) { + dst_p = mmap(NULL, size, PROT_WRITE | PROT_READ, MAP_SHARED, dst_fd, 0); + if (dst_p < 0) + return -1; + } + + buf = malloc(blk_size); + + while (to_write > blk_size) { + if (mode == 0) { + memcpy(dst_p, src_p, blk_size); + } else { + pread(src_fd, buf, blk_size, size - to_write); + pwrite(dst_fd, buf, blk_size, size - to_write); + } + + to_write -= blk_size; + dst_p += blk_size; + src_p += blk_size; + } + + if (mode == 0) { + memcpy(dst_p, src_p, to_write); + msync(dst_p - size, size, MS_SYNC); + } else { + pread(src_fd, buf, to_write, to_write); + pwrite(dst_fd, buf, blk_size, to_write); + } + + + close(src_fd); + close(dst_fd); + + return 0; +} diff --git a/tools/labs/templates/kernel_profiling/4-bio/Kbuild b/tools/labs/templates/kernel_profiling/4-bio/Kbuild new file mode 100644 index 00000000000000..91daeedbd7aa7c --- /dev/null +++ b/tools/labs/templates/kernel_profiling/4-bio/Kbuild @@ -0,0 +1,3 @@ +EXTRA_CFLAGS = -Wall -g -Wno-unused + +obj-m = bio.o diff --git a/tools/labs/templates/kernel_profiling/4-bio/bio.c b/tools/labs/templates/kernel_profiling/4-bio/bio.c new file mode 100644 index 00000000000000..ea017d5e218598 --- /dev/null +++ b/tools/labs/templates/kernel_profiling/4-bio/bio.c @@ -0,0 +1,110 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +MODULE_AUTHOR("SO2"); +MODULE_DESCRIPTION("Relay disk"); +MODULE_LICENSE("GPL"); + +#define KERN_LOG_LEVEL KERN_ALERT + +#define PHYSICAL_DISK_NAME "/dev/vdb" +#define KERNEL_SECTOR_SIZE 512 + +#define MAX_BIO 30000 +#define MAX_THREAD 20 +#define MAX_RUNS 40 + + +/* pointer to physical device structure */ +static struct block_device *phys_bdev; + +struct bio *bio[MAX_BIO]; +struct page *page[MAX_BIO]; + +static void alloc_io(struct block_device *bdev) +{ + int i; + + for (i = 0; i < MAX_BIO; i++) { + bio[i] = bio_alloc(GFP_NOIO, 1); + bio[i]->bi_disk = bdev->bd_disk; + bio[i]->bi_opf = REQ_OP_READ; + + bio[i]->bi_iter.bi_sector = i; + page[i] = alloc_page(GFP_NOIO); + bio_add_page(bio[i], page[i], KERNEL_SECTOR_SIZE, 0); + } +} + +static struct block_device *open_disk(char *name) +{ + struct block_device *bdev; + + bdev = blkdev_get_by_path(name, FMODE_READ | FMODE_WRITE | FMODE_EXCL, THIS_MODULE); + if (IS_ERR(bdev)) { + printk(KERN_ERR "blkdev_get_by_path\n"); + return NULL; + } + + return bdev; +} + +int my_thread_f(void *data) +{ + int part, sec, i, run; + + part = (int) data; + sec = MAX_BIO / MAX_THREAD; + + for (run = 0; run < MAX_RUNS; run++) { + for (i = sec * part; i < (part + 1) * sec; i++) + submit_bio_wait(bio[i]); + msleep(30 * 1000); + } + + do_exit(0); +} + +static int __init relay_init(void) +{ + int i = 0; + + phys_bdev = open_disk(PHYSICAL_DISK_NAME); + if (phys_bdev == NULL) { + printk(KERN_ERR "[relay_init] No such device\n"); + return -EINVAL; + } + + alloc_io(phys_bdev); + + for (i = 0; i < MAX_THREAD; i++) + kthread_run(my_thread_f, i, "%skwriterd%d", "my", (void *)i); + + return 0; +} + +static void close_disk(struct block_device *bdev) +{ + blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); +} + +static void __exit relay_exit(void) +{ + int i; + + for (i = 0; i < MAX_BIO; i++) { + bio_put(bio[i]); + __free_page(page[i]); + } + close_disk(phys_bdev); +} + +module_init(relay_init); +module_exit(relay_exit); diff --git a/tools/labs/templates/kernel_profiling/5-bad-elf/bad_elf b/tools/labs/templates/kernel_profiling/5-bad-elf/bad_elf new file mode 100755 index 00000000000000..731bd6392b5d05 Binary files /dev/null and b/tools/labs/templates/kernel_profiling/5-bad-elf/bad_elf differ diff --git a/tools/labs/templates/kernel_profiling/bitesize.sh b/tools/labs/templates/kernel_profiling/bitesize.sh new file mode 100755 index 00000000000000..984a7349940ee7 --- /dev/null +++ b/tools/labs/templates/kernel_profiling/bitesize.sh @@ -0,0 +1,170 @@ +#!/bin/bash +# +# bitesize - show disk I/O size as a histogram. +# Written using Linux perf_events (aka "perf"). +# +# This can be used to characterize the distribution of block device I/O +# sizes. To study I/O in more detail, see iosnoop(8). +# +# USAGE: bitesize [-h] [-b buckets] [seconds] +# eg, +# ./bitesize 10 +# +# Run "bitesize -h" for full usage. +# +# REQUIREMENTS: perf_events and block:block_rq_issue tracepoint, which you may +# already have on recent kernels. +# +# This uses multiple counting tracepoints with different filters, one for each +# histogram bucket. While this is summarized in-kernel, the use of multiple +# tracepoints does add addiitonal overhead, which is more evident if you add +# more buckets. In the future this functionality will be available in an +# efficient way in the kernel, and this tool can be rewritten. +# +# From perf-tools: https://github.com/brendangregg/perf-tools +# +# COPYRIGHT: Copyright (c) 2014 Brendan Gregg. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# +# (http://www.gnu.org/copyleft/gpl.html) +# +# 22-Jul-2014 Brendan Gregg Created this. + +duration=0 +buckets=(1 8 64 128) +secsz=512 +trap ':' INT QUIT TERM PIPE HUP + +function usage { + cat <<-END >&2 + USAGE: bitesize [-h] [-b buckets] [seconds] + -b buckets # specify histogram buckets (Kbytes) + -h # this usage message + eg, + bitesize # trace I/O size until Ctrl-C + bitesize 10 # trace I/O size for 10 seconds + bitesize -b "8 16 32" # specify custom bucket points +END + exit +} + +function die { + echo >&2 "$@" + exit 1 +} + +### process options +while getopts b:h opt +do + case $opt in + b) buckets=($OPTARG) ;; + h|?) usage ;; + esac +done +shift $(( $OPTIND - 1 )) +tpoint=block:block_rq_issue +var=nr_sector +duration=$1 + +### convert buckets (Kbytes) to disk sectors +i=0 +sectors=(${buckets[*]}) +((max_i = ${#buckets[*]} - 1)) +while (( i <= max_i )); do + (( sectors[$i] = ${sectors[$i]} * 1024 / $secsz )) + # avoid negative array index errors for old version bash + if (( i > 0 ));then + if (( ${sectors[$i]} <= ${sectors[$i - 1]} )); then + die "ERROR: bucket list must increase in size." + fi + fi + (( i++ )) +done + +### build list of tracepoints and filters for each histogram bucket +max_b=${buckets[$max_i]} +max_s=${sectors[$max_i]} +tpoints="-e $tpoint --filter \"$var < ${sectors[0]}\"" +awkarray= +i=0 +while (( i < max_i )); do + tpoints="$tpoints -e $tpoint --filter \"$var >= ${sectors[$i]} && " + tpoints="$tpoints $var < ${sectors[$i + 1]}\"" + awkarray="$awkarray buckets[$i]=${buckets[$i]};" + (( i++ )) +done +awkarray="$awkarray buckets[$max_i]=${buckets[$max_i]};" +tpoints="$tpoints -e $tpoint --filter \"$var >= ${sectors[$max_i]}\"" + +### prepare to run +if (( duration )); then + etext="for $duration seconds" + cmd="sleep $duration" +else + etext="until Ctrl-C" + cmd="sleep 999999" +fi +echo "Tracing block I/O size (bytes), $etext..." + +### run perf +out="-o /dev/stdout" # a workaround needed in linux 3.2; not by 3.4.15 +stat=$(eval ./perf stat $tpoints -a $out $cmd 2>&1) + +### find max value for ASCII histogram +most=$(echo "$stat" | awk -v tpoint=$tpoint ' + $2 == tpoint { gsub(/,/, ""); if ($1 > m) { m = $1 } } + END { print m }' +) + +### process output +echo +echo "$stat" | awk -v tpoint=$tpoint -v max_i=$max_i -v most=$most ' + function star(sval, smax, swidth) { + stars = "" + # using int could avoid error on gawk + if (int(smax) == 0) return "" + for (si = 0; si < (swidth * sval / smax); si++) { + stars = stars "#" + } + return stars + } + BEGIN { + '"$awkarray"' + printf(" %-15s: %-8s %s\n", "Kbytes", "I/O", + "Distribution") + } + /Performance counter stats/ { i = -1 } + # reverse order of rule set is important + { ok = 0 } + $2 == tpoint { num = $1; gsub(/,/, "", num); ok = 1 } + ok && i >= max_i { + printf(" %10.1f -> %-10s: %-8s |%-38s|\n", + buckets[i], "", num, star(num, most, 38)) + next + } + ok && i >= 0 && i < max_i { + printf(" %10.1f -> %-10.1f: %-8s |%-38s|\n", + buckets[i], buckets[i+1] - 0.1, num, + star(num, most, 38)) + i++ + next + } + ok && i == -1 { + printf(" %10s -> %-10.1f: %-8s |%-38s|\n", "", + buckets[0] - 0.1, num, star(num, most, 38)) + i++ + } +' diff --git a/tools/labs/templates/kernel_profiling/funcgraph.sh b/tools/labs/templates/kernel_profiling/funcgraph.sh new file mode 100755 index 00000000000000..b796e338f17c7f --- /dev/null +++ b/tools/labs/templates/kernel_profiling/funcgraph.sh @@ -0,0 +1,259 @@ +#!/bin/bash +# +# funcgraph - trace kernel function graph, showing child function calls. +# Uses Linux ftrace. +# +# This is an exploratory tool that shows the graph of child function calls +# for a given kernel function. This can cost moderate overhead to execute, and +# should only be used to understand kernel behavior for a given function before +# using other, lower overhead tools. This is a proof of concept using Linux +# ftrace capabilities on older kernels. +# +# USAGE: funcgraph [-aCDhHPtT] [-m maxdepth] [-p PID] [-L TID] [-d secs] funcstring +# +# Run "funcgraph -h" for full usage. +# +# The output format is the same as the ftrace function graph trace format, +# described in the kernel source under Documentation/trace/ftrace.txt. +# Note that the output may be shuffled when different CPU buffers are read; +# check the CPU column for changes, or include timestamps (-t) and post sort. +# +# The "-d duration" mode leaves the trace data in the kernel buffer, and +# only reads it at the end. If the trace data is large, beware of exhausting +# buffer space (/sys/kernel/debug/tracing/buffer_size_kb) and losing data. +# +# Also beware of feedback loops: tracing tcp* functions over an ssh session, +# or writing ext4* functions to an ext4 file system. For the former, tcp +# trace data could be redirected to a file (as in the usage message). For +# the latter, trace to the screen or a different file system. +# +# WARNING: This uses dynamic tracing of kernel functions, and could cause +# kernel panics or freezes. Test, and know what you are doing, before use. +# +# OVERHEADS: This tool causes moderate to high overheads. Use with caution for +# exploratory purposes, then switch to lower overhead techniques based on +# findings. It's expected that the kernel will run at least 50% slower while +# this tool is running -- even while no output is being generated. This is +# because ALL kernel functions are traced, and filtered based on the function +# of interest. When output is generated, it can generate many lines quickly +# depending on the traced event. Such data will cause performance overheads. +# This also works without buffering by default, printing function events +# as they happen (uses trace_pipe), context switching and consuming CPU to do +# so. If needed, you can try the "-d secs" option, which buffers events +# instead, reducing overhead. If you think the buffer option is losing events, +# try increasing the buffer size (buffer_size_kb). +# +# From perf-tools: https://github.com/brendangregg/perf-tools +# +# COPYRIGHT: Copyright (c) 2014 Brendan Gregg. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# +# (http://www.gnu.org/copyleft/gpl.html) +# +# 12-Jul-2014 Brendan Gregg Created this. + +### default variables +tracing=/sys/kernel/debug/tracing +flock=/var/tmp/.ftrace-lock +opt_duration=0; duration=; opt_pid=0; pid=; opt_tid=0; tid=; pidtext= +opt_headers=0; opt_proc=0; opt_time=0; opt_tail=0; opt_nodur=0; opt_cpu=0 +opt_max=0; max=0 +trap ':' INT QUIT TERM PIPE HUP # sends execution to end tracing section + +function usage { + cat <<-END >&2 + USAGE: funcgraph [-aCDhHPtT] [-m maxdepth] [-p PID] [-L TID] [-d secs] funcstring + -a # all info (same as -HPt) + -C # measure on-CPU time only + -d seconds # trace duration, and use buffers + -D # do not show function duration + -h # this usage message + -H # include column headers + -m maxdepth # max stack depth to show + -p PID # trace when this pid is on-CPU + -L TID # trace when this thread is on-CPU + -P # show process names & PIDs + -t # show timestamps + -T # comment function tails + eg, + funcgraph do_nanosleep # trace do_nanosleep() and children + funcgraph -m 3 do_sys_open # trace do_sys_open() to 3 levels only + funcgraph -a do_sys_open # include timestamps and process name + funcgraph -p 198 do_sys_open # trace vfs_read() for PID 198 only + funcgraph -d 1 do_sys_open >out # trace 1 sec, then write to file + + See the man page and example file for more info. +END + exit +} + +function warn { + if ! eval "$@"; then + echo >&2 "WARNING: command failed \"$@\"" + fi +} + +function end { + # disable tracing + echo 2>/dev/null + echo "Ending tracing..." 2>/dev/null + cd $tracing + + (( opt_time )) && warn "echo nofuncgraph-abstime > trace_options" + (( opt_proc )) && warn "echo nofuncgraph-proc > trace_options" + (( opt_tail )) && warn "echo nofuncgraph-tail > trace_options" + (( opt_nodur )) && warn "echo funcgraph-duration > trace_options" + (( opt_cpu )) && warn "echo sleep-time > trace_options" + + warn "echo nop > current_tracer" + (( opt_pid || opt_tid )) && warn "echo > set_ftrace_pid" + (( opt_max )) && warn "echo 0 > max_graph_depth" + warn "echo > set_graph_function" + warn "echo > trace" + + (( wroteflock )) && warn "rm $flock" +} + +function die { + echo >&2 "$@" + exit 1 +} + +function edie { + # die with a quiet end() + echo >&2 "$@" + exec >/dev/null 2>&1 + end + exit 1 +} + +### process options +while getopts aCd:DhHm:p:L:PtT opt +do + case $opt in + a) opt_headers=1; opt_proc=1; opt_time=1 ;; + C) opt_cpu=1; ;; + d) opt_duration=1; duration=$OPTARG ;; + D) opt_nodur=1; ;; + m) opt_max=1; max=$OPTARG ;; + p) opt_pid=1; pid=$OPTARG ;; + L) opt_tid=1; tid=$OPTARG ;; + H) opt_headers=1; ;; + P) opt_proc=1; ;; + t) opt_time=1; ;; + T) opt_tail=1; ;; + h|?) usage ;; + esac +done +shift $(( $OPTIND - 1 )) + +### option logic +(( $# == 0 )) && usage +(( opt_pid && opt_tid )) && edie "ERROR: You can use -p or -L but not both." +funcs="$1" +(( opt_pid )) && pidtext=" for PID $pid" +(( opt_tid )) && pidtext=" for TID $tid" +if (( opt_duration )); then + echo "Tracing \"$funcs\"$pidtext for $duration seconds..." +else + echo "Tracing \"$funcs\"$pidtext... Ctrl-C to end." +fi + +### check permissions +cd $tracing || die "ERROR: accessing tracing. Root user? Kernel has FTRACE? + debugfs mounted? (mount -t debugfs debugfs /sys/kernel/debug)" + +### ftrace lock +[[ -e $flock ]] && die "ERROR: ftrace may be in use by PID $(cat $flock) $flock" +echo $$ > $flock || die "ERROR: unable to write $flock." +wroteflock=1 + +### setup and commence tracing +sysctl -q kernel.ftrace_enabled=1 # doesn't set exit status +read mode < current_tracer +[[ "$mode" != "nop" ]] && edie "ERROR: ftrace active (current_tracer=$mode)" +if (( opt_max )); then + if ! echo $max > max_graph_depth; then + edie "ERROR: setting -m $max. Older kernel version? Exiting." + fi +fi +if (( opt_pid )); then + echo > set_ftrace_pid + # ftrace expects kernel pids, which are thread ids + for tid in /proc/$pid/task/*; do + if ! echo ${tid##*/} >> set_ftrace_pid; then + edie "ERROR: setting -p $pid (PID exist?). Exiting." + fi + done +fi +if (( opt_tid )); then + if ! echo $tid > set_ftrace_pid; then + edie "ERROR: setting -L $tid (TID exist?). Exiting." + fi +fi +if ! echo > set_ftrace_filter; then + edie "ERROR: writing to set_ftrace_filter. Exiting." +fi +if ! echo "$funcs" > set_graph_function; then + edie "ERROR: enabling \"$funcs\". Exiting." +fi +if ! echo function_graph > current_tracer; then + edie "ERROR: setting current_tracer to \"function\". Exiting." +fi +if (( opt_cpu )); then + if ! echo nosleep-time > trace_options; then + edie "ERROR: setting -C (nosleep-time). Exiting." + fi +fi +# the following must be done after setting current_tracer +if (( opt_time )); then + if ! echo funcgraph-abstime > trace_options; then + edie "ERROR: setting -t (funcgraph-abstime). Exiting." + fi +fi +if (( opt_proc )); then + if ! echo funcgraph-proc > trace_options; then + edie "ERROR: setting -P (funcgraph-proc). Exiting." + fi +fi +if (( opt_tail )); then + if ! echo funcgraph-tail > trace_options; then + edie "ERROR: setting -T (funcgraph-tail). Old kernel? Exiting." + fi +fi +if (( opt_nodur )); then + if ! echo nofuncgraph-duration > trace_options; then + edie "ERROR: setting -D (nofuncgraph-duration). Exiting." + fi +fi + +### print trace buffer +warn "echo > trace" +if (( opt_duration )); then + sleep $duration + if (( opt_headers )); then + cat trace + else + grep -v '^#' trace + fi +else + # trace_pipe lack headers, so fetch them from trace + (( opt_headers )) && cat trace + cat trace_pipe +fi + +### end tracing +end diff --git a/tools/labs/templates/kernel_profiling/iolatency.sh b/tools/labs/templates/kernel_profiling/iolatency.sh new file mode 100755 index 00000000000000..d646193ea604a7 --- /dev/null +++ b/tools/labs/templates/kernel_profiling/iolatency.sh @@ -0,0 +1,296 @@ +#!/bin/bash +# +# iolatency - summarize block device I/O latency as a histogram. +# Written using Linux ftrace. +# +# This shows the distribution of latency, allowing modes and latency outliers +# to be identified and studied. +# +# USAGE: ./iolatency [-hQT] [-d device] [-i iotype] [interval [count]] +# +# REQUIREMENTS: FTRACE CONFIG and block:block_rq_* tracepoints, which you may +# already have on recent kernels. +# +# OVERHEAD: block device I/O issue and completion events are traced and buffered +# in-kernel, then processed and summarized in user space. There may be +# measurable overhead with this approach, relative to the block device IOPS. +# +# This was written as a proof of concept for ftrace. +# +# From perf-tools: https://github.com/brendangregg/perf-tools +# +# COPYRIGHT: Copyright (c) 2014 Brendan Gregg. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# +# (http://www.gnu.org/copyleft/gpl.html) +# +# 20-Jul-2014 Brendan Gregg Created this. + +### default variables +tracing=/sys/kernel/debug/tracing +flock=/var/tmp/.ftrace-lock +bufsize_kb=4096 +opt_device=0; device=; opt_iotype=0; iotype=; opt_timestamp=0 +opt_interval=0; interval=1; opt_count=0; count=0; opt_queue=0 +trap ':' INT QUIT TERM PIPE HUP # sends execution to end tracing section + +function usage { + cat <<-END >&2 + USAGE: iolatency [-hQT] [-d device] [-i iotype] [interval [count]] + -d device # device string (eg, "202,1) + -i iotype # match type (eg, '*R*' for all reads) + -Q # use queue insert as start time + -T # timestamp on output + -h # this usage message + interval # summary interval, seconds (default 1) + count # number of summaries + eg, + iolatency # summarize latency every second + iolatency -Q # include block I/O queue time + iolatency 5 2 # 2 x 5 second summaries + iolatency -i '*R*' # trace reads + iolatency -d 202,1 # trace device 202,1 only + + See the man page and example file for more info. +END + exit +} + +function warn { + if ! eval "$@"; then + echo >&2 "WARNING: command failed \"$@\"" + fi +} + +function end { + # disable tracing + echo 2>/dev/null + echo "Ending tracing..." 2>/dev/null + cd $tracing + warn "echo 0 > events/block/$b_start/enable" + warn "echo 0 > events/block/block_rq_complete/enable" + if (( opt_device || opt_iotype )); then + warn "echo 0 > events/block/$b_start/filter" + warn "echo 0 > events/block/block_rq_complete/filter" + fi + warn "echo > trace" + (( wroteflock )) && warn "rm $flock" +} + +function die { + echo >&2 "$@" + exit 1 +} + +function edie { + # die with a quiet end() + echo >&2 "$@" + exec >/dev/null 2>&1 + end + exit 1 +} + +### process options +while getopts d:hi:QT opt +do + case $opt in + d) opt_device=1; device=$OPTARG ;; + i) opt_iotype=1; iotype=$OPTARG ;; + Q) opt_queue=1 ;; + T) opt_timestamp=1 ;; + h|?) usage ;; + esac +done +shift $(( $OPTIND - 1 )) +if (( $# )); then + opt_interval=1 + interval=$1 + shift +fi +if (( $# )); then + opt_count=1 + count=$1 +fi +if (( opt_device )); then + major=${device%,*} + minor=${device#*,} + dev=$(( (major << 20) + minor )) +fi +if (( opt_queue )); then + b_start=block_rq_insert +else + b_start=block_rq_issue +fi + +### select awk +[[ -x /usr/bin/mawk ]] && awk='mawk -W interactive' || awk=awk + +### check permissions +cd $tracing || die "ERROR: accessing tracing. Root user? Kernel has FTRACE? + debugfs mounted? (mount -t debugfs debugfs /sys/kernel/debug)" + +### ftrace lock +[[ -e $flock ]] && die "ERROR: ftrace may be in use by PID $(cat $flock) $flock" +echo $$ > $flock || die "ERROR: unable to write $flock." +wroteflock=1 + +### setup and begin tracing +warn "echo nop > current_tracer" +warn "echo $bufsize_kb > buffer_size_kb" +filter= +if (( opt_iotype )); then + filter="rwbs ~ \"$iotype\"" +fi +if (( opt_device )); then + [[ "$filter" != "" ]] && filter="$filter && " + filter="${filter}dev == $dev" +fi +if (( opt_iotype || opt_device )); then + if ! echo "$filter" > events/block/$b_start/filter || \ + ! echo "$filter" > events/block/block_rq_complete/filter + then + edie "ERROR: setting -d or -t filter. Exiting." + fi +fi +if ! echo 1 > events/block/$b_start/enable || \ + ! echo 1 > events/block/block_rq_complete/enable; then + edie "ERROR: enabling block I/O tracepoints. Exiting." +fi +etext= +(( !opt_count )) && etext=" Ctrl-C to end." +echo "Tracing block I/O. Output every $interval seconds.$etext" + +# +# Determine output format. It may be one of the following (newest first): +# TASK-PID CPU# |||| TIMESTAMP FUNCTION +# TASK-PID CPU# TIMESTAMP FUNCTION +# To differentiate between them, the number of header fields is counted, +# and an offset set, to skip the extra column when needed. +# +offset=$($awk 'BEGIN { o = 0; } + $1 == "#" && $2 ~ /TASK/ && NF == 6 { o = 1; } + $2 ~ /TASK/ { print o; exit }' trace) + +### print trace buffer +warn "echo > trace" +i=0 +while (( !opt_count || (i < count) )); do + (( i++ )) + sleep $interval + + # snapshots were added in 3.10 + if [[ -x snapshot ]]; then + echo 1 > snapshot + echo > trace + cat snapshot + else + cat trace + echo > trace + fi + + (( opt_timestamp )) && printf "time %(%H:%M:%S)T:\n" -1 + echo "tick" +done | \ +$awk -v o=$offset -v opt_timestamp=$opt_timestamp -v b_start=$b_start ' + function star(sval, smax, swidth) { + stars = "" + if (smax == 0) return "" + for (si = 0; si < (swidth * sval / smax); si++) { + stars = stars "#" + } + return stars + } + + BEGIN { max_i = 0 } + + # common fields + $1 != "#" { + time = $(3+o); sub(":", "", time) + dev = $(5+o) + } + + # block I/O request + $1 != "#" && $0 ~ b_start { + # + # example: (fields1..4+o) 202,1 W 0 () 12862264 + 8 [tar] + # The cmd field "()" might contain multiple words (hex), + # hence stepping from the right (NF-3). + # + loc = $(NF-3) + starts[dev, loc] = time + next + } + + # block I/O completion + $1 != "#" && $0 ~ /rq_complete/ { + # + # example: (fields1..4+o) 202,1 W () 12862256 + 8 [0] + # + dir = $(6+o) + loc = $(NF-3) + + if (starts[dev, loc] > 0) { + latency_ms = 1000 * (time - starts[dev, loc]) + i = 0 + for (ms = 1; latency_ms > ms; ms *= 2) { i++ } + hist[i]++ + if (i > max_i) + max_i = i + delete starts[dev, loc] + } + next + } + + # timestamp + $1 == "time" { + lasttime = $2 + } + + # print summary + $1 == "tick" { + print "" + if (opt_timestamp) + print lasttime + + # find max value + max_v = 0 + for (i = 0; i <= max_i; i++) { + if (hist[i] > max_v) + max_v = hist[i] + } + + # print histogram + printf "%8s .. %-8s: %-8s |%-38s|\n", ">=(ms)", "<(ms)", + "I/O", "Distribution" + ms = 1 + from = 0 + for (i = 0; i <= max_i; i++) { + printf "%8d -> %-8d: %-8d |%-38s|\n", from, ms, + hist[i], star(hist[i], max_v, 38) + from = ms + ms *= 2 + } + fflush() + delete hist + delete starts # invalid if events missed between snapshots + max_i = 0 + } + + $0 ~ /LOST.*EVENTS/ { print "WARNING: " $0 > "/dev/stderr" } +' + +### end tracing +end diff --git a/tools/labs/templates/kernel_profiling/iosnoop.sh b/tools/labs/templates/kernel_profiling/iosnoop.sh new file mode 100755 index 00000000000000..603b3e83f9b944 --- /dev/null +++ b/tools/labs/templates/kernel_profiling/iosnoop.sh @@ -0,0 +1,296 @@ +#!/bin/bash +# +# iosnoop - trace block device I/O. +# Written using Linux ftrace. +# +# This traces disk I/O at the block device interface, using the block: +# tracepoints. This can help characterize the I/O requested for the storage +# devices and their resulting performance. I/O completions can also be studied +# event-by-event for debugging disk and controller I/O scheduling issues. +# +# USAGE: ./iosnoop [-hQst] [-d device] [-i iotype] [-p pid] [-n name] [duration] +# +# Run "iosnoop -h" for full usage. +# +# REQUIREMENTS: FTRACE CONFIG, block:block_rq_* tracepoints (you may +# already have these on recent kernels). +# +# OVERHEAD: By default, iosnoop works without buffering, printing I/O events +# as they happen (uses trace_pipe), context switching and consuming CPU to do +# so. This has a limit of about 10,000 IOPS (depending on your platform), at +# which point iosnoop will be consuming 1 CPU. The duration mode uses buffering, +# and can handle much higher IOPS rates, however, the buffer has a limit of +# about 50,000 I/O, after which events will be dropped. You can tune this with +# bufsize_kb, which is per-CPU. Also note that the "-n" option is currently +# post-filtered, so all events are traced. +# +# This was written as a proof of concept for ftrace. It would be better written +# using perf_events (after some capabilities are added), which has a better +# buffering policy, or a tracer such as SystemTap or ktap. +# +# From perf-tools: https://github.com/brendangregg/perf-tools +# +# See the iosnoop(8) man page (in perf-tools) for more info. +# +# COPYRIGHT: Copyright (c) 2014 Brendan Gregg. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# +# (http://www.gnu.org/copyleft/gpl.html) +# +# 12-Jul-2014 Brendan Gregg Created this. + +### default variables +tracing=/sys/kernel/debug/tracing +flock=/var/tmp/.ftrace-lock +bufsize_kb=4096 +opt_duration=0; duration=; opt_name=0; name=; opt_pid=0; pid=; ftext= +opt_start=0; opt_end=0; opt_device=0; device=; opt_iotype=0; iotype= +opt_queue=0 +trap ':' INT QUIT TERM PIPE HUP # sends execution to end tracing section + +function usage { + cat <<-END >&2 + USAGE: iosnoop [-hQst] [-d device] [-i iotype] [-p PID] [-n name] + [duration] + -d device # device string (eg, "202,1) + -i iotype # match type (eg, '*R*' for all reads) + -n name # process name to match on I/O issue + -p PID # PID to match on I/O issue + -Q # use queue insert as start time + -s # include start time of I/O (s) + -t # include completion time of I/O (s) + -h # this usage message + duration # duration seconds, and use buffers + eg, + iosnoop # watch block I/O live (unbuffered) + iosnoop 1 # trace 1 sec (buffered) + iosnoop -Q # include queueing time in LATms + iosnoop -ts # include start and end timestamps + iosnoop -i '*R*' # trace reads + iosnoop -p 91 # show I/O issued when PID 91 is on-CPU + iosnoop -Qp 91 # show I/O queued by PID 91, queue time + + See the man page and example file for more info. +END + exit +} + +function warn { + if ! eval "$@"; then + echo >&2 "WARNING: command failed \"$@\"" + fi +} + +function end { + # disable tracing + echo 2>/dev/null + echo "Ending tracing..." 2>/dev/null + cd $tracing + warn "echo 0 > events/block/$b_start/enable" + warn "echo 0 > events/block/block_rq_complete/enable" + if (( opt_device || opt_iotype || opt_pid )); then + warn "echo 0 > events/block/$b_start/filter" + warn "echo 0 > events/block/block_rq_complete/filter" + fi + warn "echo > trace" + (( wroteflock )) && warn "rm $flock" +} + +function die { + echo >&2 "$@" + exit 1 +} + +function edie { + # die with a quiet end() + echo >&2 "$@" + exec >/dev/null 2>&1 + end + exit 1 +} + +### process options +while getopts d:hi:n:p:Qst opt +do + case $opt in + d) opt_device=1; device=$OPTARG ;; + i) opt_iotype=1; iotype=$OPTARG ;; + n) opt_name=1; name=$OPTARG ;; + p) opt_pid=1; pid=$OPTARG ;; + Q) opt_queue=1 ;; + s) opt_start=1 ;; + t) opt_end=1 ;; + h|?) usage ;; + esac +done +shift $(( $OPTIND - 1 )) +if (( $# )); then + opt_duration=1 + duration=$1 + shift +fi +if (( opt_device )); then + major=${device%,*} + minor=${device#*,} + dev=$(( (major << 20) + minor )) +fi + +### option logic +(( opt_pid && opt_name )) && die "ERROR: use either -p or -n." +(( opt_pid )) && ftext=" issued by PID $pid" +(( opt_name )) && ftext=" issued by process name \"$name\"" +if (( opt_duration )); then + echo "Tracing block I/O$ftext for $duration seconds (buffered)..." +else + echo "Tracing block I/O$ftext. Ctrl-C to end." +fi +if (( opt_queue )); then + b_start=block_rq_insert +else + b_start=block_rq_issue +fi + +### select awk +(( opt_duration )) && use=mawk || use=gawk # workaround for mawk fflush() +[[ -x /usr/bin/$use ]] && awk=$use || awk=awk +wroteflock=1 + +### check permissions +cd $tracing || die "ERROR: accessing tracing. Root user? Kernel has FTRACE? + debugfs mounted? (mount -t debugfs debugfs /sys/kernel/debug)" + +### ftrace lock +[[ -e $flock ]] && die "ERROR: ftrace may be in use by PID $(cat $flock) $flock" +echo $$ > $flock || die "ERROR: unable to write $flock." + +### setup and begin tracing +echo nop > current_tracer +warn "echo $bufsize_kb > buffer_size_kb" +filter= +if (( opt_iotype )); then + filter="rwbs ~ \"$iotype\"" +fi +if (( opt_device )); then + [[ "$filter" != "" ]] && filter="$filter && " + filter="${filter}dev == $dev" +fi +filter_i=$filter +if (( opt_pid )); then + [[ "$filter_i" != "" ]] && filter_i="$filter_i && " + filter_i="${filter_i}common_pid == $pid" + [[ "$filter" == "" ]] && filter=0 +fi +if (( opt_iotype || opt_device || opt_pid )); then + if ! echo "$filter_i" > events/block/$b_start/filter || \ + ! echo "$filter" > events/block/block_rq_complete/filter + then + edie "ERROR: setting -d or -t filter. Exiting." + fi +fi +if ! echo 1 > events/block/$b_start/enable || \ + ! echo 1 > events/block/block_rq_complete/enable; then + edie "ERROR: enabling block I/O tracepoints. Exiting." +fi +(( opt_start )) && printf "%-15s " "STARTs" +(( opt_end )) && printf "%-15s " "ENDs" +printf "%-12.12s %-6s %-4s %-8s %-12s %-6s %8s\n" \ + "COMM" "PID" "TYPE" "DEV" "BLOCK" "BYTES" "LATms" + +# +# Determine output format. It may be one of the following (newest first): +# TASK-PID CPU# |||| TIMESTAMP FUNCTION +# TASK-PID CPU# TIMESTAMP FUNCTION +# To differentiate between them, the number of header fields is counted, +# and an offset set, to skip the extra column when needed. +# +offset=$($awk 'BEGIN { o = 0; } + $1 == "#" && $2 ~ /TASK/ && NF == 6 { o = 1; } + $2 ~ /TASK/ { print o; exit }' trace) + +### print trace buffer +warn "echo > trace" +( if (( opt_duration )); then + # wait then dump buffer + sleep $duration + cat trace +else + # print buffer live + cat trace_pipe +fi ) | $awk -v o=$offset -v opt_name=$opt_name -v name=$name \ + -v opt_duration=$opt_duration -v opt_start=$opt_start -v opt_end=$opt_end \ + -v b_start=$b_start ' + # common fields + $1 != "#" { + # task name can contain dashes + comm = pid = $1 + sub(/-[0-9][0-9]*/, "", comm) + sub(/.*-/, "", pid) + time = $(3+o); sub(":", "", time) + dev = $(5+o) + } + + # block I/O request + $1 != "#" && $0 ~ b_start { + if (opt_name && match(comm, name) == 0) + next + # + # example: (fields1..4+o) 202,1 W 0 () 12862264 + 8 [tar] + # The cmd field "()" might contain multiple words (hex), + # hence stepping from the right (NF-3). + # + loc = $(NF-3) + starts[dev, loc] = time + comms[dev, loc] = comm + pids[dev, loc] = pid + next + } + + # block I/O completion + $1 != "#" && $0 ~ /rq_complete/ { + # + # example: (fields1..4+o) 202,1 W () 12862256 + 8 [0] + # + dir = $(6+o) + loc = $(NF-3) + nsec = $(NF-1) + + if (starts[dev, loc] > 0) { + latency = sprintf("%.2f", + 1000 * (time - starts[dev, loc])) + comm = comms[dev, loc] + pid = pids[dev, loc] + + if (opt_start) + printf "%-15s ", starts[dev, loc] + if (opt_end) + printf "%-15s ", time + printf "%-12.12s %-6s %-4s %-8s %-12s %-6s %8s\n", + comm, pid, dir, dev, loc, nsec * 512, latency + if (!opt_duration) + fflush() + + delete starts[dev, loc] + delete comms[dev, loc] + delete pids[dev, loc] + } + next + } + + $0 ~ /LOST.*EVENTS/ { print "WARNING: " $0 > "/dev/stderr" } +' + +### end tracing +end diff --git a/tools/labs/templates/kernel_profiling/kprobe.sh b/tools/labs/templates/kernel_profiling/kprobe.sh new file mode 100755 index 00000000000000..b4523d88286f0d --- /dev/null +++ b/tools/labs/templates/kernel_profiling/kprobe.sh @@ -0,0 +1,270 @@ +#!/bin/bash +# +# kprobe - trace a given kprobe definition. Kernel dynamic tracing. +# Written using Linux ftrace. +# +# This will create, trace, then destroy a given kprobe definition. See +# Documentation/trace/kprobetrace.txt in the Linux kernel source for the +# syntax of a kprobe definition, and "kprobe -h" for examples. With this tool, +# the probe alias is optional (it will become to kprobe: if not +# specified). +# +# USAGE: ./kprobe [-FhHsv] [-d secs] [-p pid] [-L tid] kprobe_definition [filter] +# +# Run "kprobe -h" for full usage. +# +# I wrote this because I kept testing different custom kprobes at the command +# line, and wanted a way to automate the steps. +# +# WARNING: This uses dynamic tracing of kernel functions, and could cause +# kernel panics or freezes, depending on the function traced. Test in a lab +# environment, and know what you are doing, before use. +# +# REQUIREMENTS: FTRACE and KPROBE CONFIG, which you may already have on recent +# kernel versions. +# +# From perf-tools: https://github.com/brendangregg/perf-tools +# +# See the kprobe(8) man page (in perf-tools) for more info. +# +# COPYRIGHT: Copyright (c) 2014 Brendan Gregg. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# +# (http://www.gnu.org/copyleft/gpl.html) +# +# 22-Jul-2014 Brendan Gregg Created this. + +### default variables +tracing=/sys/kernel/debug/tracing +flock=/var/tmp/.ftrace-lock; wroteflock=0 +opt_duration=0; duration=; opt_pid=0; pid=; opt_tid=0; tid= +opt_filter=0; filter=; opt_view=0; opt_headers=0; opt_stack=0; dmesg=2 +debug=0; opt_force=0 +trap ':' INT QUIT TERM PIPE HUP # sends execution to end tracing section + +function usage { + cat <<-END >&2 + USAGE: kprobe [-FhHsv] [-d secs] [-p PID] [-L TID] kprobe_definition [filter] + -F # force. trace despite warnings. + -d seconds # trace duration, and use buffers + -p PID # PID to match on events + -L TID # thread id to match on events + -v # view format file (don't trace) + -H # include column headers + -s # show kernel stack traces + -h # this usage message + + Note that these examples may need modification to match your kernel + version's function names and platform's register usage. + eg, + kprobe p:do_sys_open + # trace open() entry + kprobe r:do_sys_open + # trace open() return + kprobe 'r:do_sys_open \$retval' + # trace open() return value + kprobe 'r:myopen do_sys_open \$retval' + # use a custom probe name + kprobe 'p:myopen do_sys_open mode=%cx:u16' + # trace open() file mode + kprobe 'p:myopen do_sys_open filename=+0(%si):string' + # trace open() with filename + kprobe -s 'p:myprobe tcp_retransmit_skb' + # show kernel stacks + kprobe 'p:do_sys_open file=+0(%si):string' 'file ~ "*stat"' + # opened files ending in "stat" + + See the man page and example file for more info. +END + exit +} + +function warn { + if ! eval "$@"; then + echo >&2 "WARNING: command failed \"$@\"" + fi +} + +function end { + # disable tracing + echo 2>/dev/null + echo "Ending tracing..." 2>/dev/null + cd $tracing + warn "echo 0 > events/kprobes/$kname/enable" + if (( opt_filter )); then + warn "echo 0 > events/kprobes/$kname/filter" + fi + warn "echo -:$kname >> kprobe_events" + (( opt_stack )) && warn "echo 0 > options/stacktrace" + warn "echo > trace" + (( wroteflock )) && warn "rm $flock" +} + +function die { + echo >&2 "$@" + exit 1 +} + +function edie { + # die with a quiet end() + echo >&2 "$@" + exec >/dev/null 2>&1 + end + exit 1 +} + +### process options +while getopts Fd:hHp:L:sv opt +do + case $opt in + F) opt_force=1 ;; + d) opt_duration=1; duration=$OPTARG ;; + p) opt_pid=1; pid=$OPTARG ;; + L) opt_tid=1; tid=$OPTARG ;; + H) opt_headers=1 ;; + s) opt_stack=1 ;; + v) opt_view=1 ;; + h|?) usage ;; + esac +done +shift $(( $OPTIND - 1 )) +(( $# )) || usage +kprobe=$1 +shift +if (( $# )); then + opt_filter=1 + filter=$1 +fi + +### option logic +(( opt_pid + opt_filter + opt_tid > 1 )) && \ + die "ERROR: use at most one of -p, -L, or filter." +(( opt_duration && opt_view )) && die "ERROR: use either -d or -v." +if (( opt_pid )); then + # convert to filter + opt_filter=1 + # ftrace common_pid is thread id from user's perspective + for tid in /proc/$pid/task/*; do + filter="$filter || common_pid == ${tid##*/}" + done + filter=${filter:3} # trim leading ' || ' (four characters) +fi +if (( opt_tid )); then + opt_filter=1 + filter="common_pid == $tid" +fi +if [[ "$kprobe" != p:* && "$kprobe" != r:* ]]; then + echo >&2 "ERROR: invalid kprobe definition (should start with p: or r:)" + usage +fi +# +# parse the following: +# r:do_sys_open +# r:my_sys_open do_sys_open +# r:do_sys_open %ax +# r:do_sys_open $retval %ax +# r:my_sys_open do_sys_open $retval %ax +# r:do_sys_open rval=$retval +# r:my_sys_open do_sys_open rval=$retval +# r:my_sys_open do_sys_open rval=$retval %ax +# ... and examples from USAGE message +# +krest=${kprobe#*:} +kname=${krest%% *} +set -- $krest +if [[ $2 == "" || $2 == *[=%\$]* ]]; then + # if probe name unspecified, default to function name + ktype=${kprobe%%:*} + kprobe="$ktype:$kname $krest" +fi +if (( debug )); then + echo "kname: $kname, kprobe: $kprobe" +fi + +### check permissions +cd $tracing || die "ERROR: accessing tracing. Root user? Kernel has FTRACE? + debugfs mounted? (mount -t debugfs debugfs /sys/kernel/debug)" + +## check function +set -- $kprobe +fname=$2 +if (( !opt_force )) && ! grep -w $fname available_filter_functions >/dev/null \ + 2>&1 +then + echo >&2 "ERROR: func $fname not in $PWD/available_filter_functions." + printf >&2 "Either it doesn't exist, or, it might be unsafe to kprobe. " + echo >&2 "Exiting. Use -F to override." + exit 1 +fi + +if (( !opt_view )); then + if (( opt_duration )); then + echo "Tracing kprobe $kname for $duration seconds (buffered)..." + else + echo "Tracing kprobe $kname. Ctrl-C to end." + fi +fi + +### ftrace lock +[[ -e $flock ]] && die "ERROR: ftrace may be in use by PID $(cat $flock) $flock" +echo $$ > $flock || die "ERROR: unable to write $flock." +wroteflock=1 + +### setup and begin tracing +echo nop > current_tracer +if ! echo "$kprobe" >> kprobe_events; then + echo >&2 "ERROR: adding kprobe \"$kprobe\"." + if (( dmesg )); then + echo >&2 "Last $dmesg dmesg entries (might contain reason):" + dmesg | tail -$dmesg | sed 's/^/ /' + fi + edie "Exiting." +fi +if (( opt_view )); then + cat events/kprobes/$kname/format + edie "" +fi +if (( opt_filter )); then + if ! echo "$filter" > events/kprobes/$kname/filter; then + edie "ERROR: setting filter or -p. Exiting." + fi +fi +if (( opt_stack )); then + if ! echo 1 > options/stacktrace; then + edie "ERROR: enabling stack traces (-s). Exiting" + fi +fi +if ! echo 1 > events/kprobes/$kname/enable; then + edie "ERROR: enabling kprobe $kname. Exiting." +fi + +### print trace buffer +warn "echo > trace" +if (( opt_duration )); then + sleep $duration + if (( opt_headers )); then + cat trace + else + grep -v '^#' trace + fi +else + # trace_pipe lack headers, so fetch them from trace + (( opt_headers )) && cat trace + cat trace_pipe +fi + +### end tracing +end diff --git a/tools/labs/templates/kernel_profiling/syscount.sh b/tools/labs/templates/kernel_profiling/syscount.sh new file mode 100755 index 00000000000000..7cf148e154e7a9 --- /dev/null +++ b/tools/labs/templates/kernel_profiling/syscount.sh @@ -0,0 +1,192 @@ +#!/bin/bash +# +# syscount - count system calls. +# Written using Linux perf_events (aka "perf"). +# +# This is a proof-of-concept using perf_events capabilities for older kernel +# versions, that lack custom in-kernel aggregations. Once they exist, this +# script can be substantially rewritten and improved (lower overhead). +# +# USAGE: syscount [-chv] [-t top] {-p PID|-d seconds|command} +# +# Run "syscount -h" for full usage. +# +# REQUIREMENTS: Linux perf_events: add linux-tools-common, run "perf", then +# add any additional packages it requests. Also needs awk. +# +# OVERHEADS: Modes that report syscall names only (-c, -cp PID, -cd secs) have +# lower overhead, since they use in-kernel counts. Other modes which report +# process IDs (-cv) or process names (default) create a perf.data file for +# post processing, and you will see messages about it doing this. Beware of +# the file size (test for short durations, or use -c to see counts based on +# in-kernel counters), and gauge overheads based on the perf.data size. +# +# Note that this script delibrately does not pipe perf record into +# perf script, which would avoid perf.data, because it can create a feedback +# loop where the perf script syscalls are recorded. Hopefully there will be a +# fix for this in a later perf version, so perf.data can be skipped, or other +# kernel features to aggregate by process name in-kernel directly (eg, via +# eBPF, ktap, or SystemTap). +# +# From perf-tools: https://github.com/brendangregg/perf-tools +# +# See the syscount(8) man page (in perf-tools) for more info. +# +# COPYRIGHT: Copyright (c) 2014 Brendan Gregg. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# +# (http://www.gnu.org/copyleft/gpl.html) +# +# 07-Jul-2014 Brendan Gregg Created this. + +# default variables +opt_count=0; opt_pid=0; opt_verbose=0; opt_cmd=0; opt_duration=0; opt_tail=0 +tnum=; pid=; duration=; cmd=; cpus=-a; opts=; tcmd=cat; ttext= +trap '' INT QUIT TERM PIPE HUP + +stdout_workaround=1 # needed for older perf versions +write_workaround=1 # needed for perf versions that trace their own writes + +### parse options +while getopts cd:hp:t:v opt +do + case $opt in + c) opt_count=1 ;; + d) opt_duration=1; duration=$OPTARG ;; + p) opt_pid=1; pid=$OPTARG ;; + t) opt_tail=1; tnum=$OPTARG ;; + v) opt_verbose=1 ;; + h|?) cat <<-END >&2 + USAGE: syscount [-chv] [-t top] {-p PID|-d seconds|command} + syscount # count by process name + -c # show counts by syscall name + -h # this usage message + -v # verbose: shows PID + -p PID # trace this PID only + -d seconds # duration of trace + -t num # show top number only + command # run and trace this command + eg, + syscount # syscalls by process name + syscount -c # syscalls by syscall name + syscount -d 5 # trace for 5 seconds + syscount -cp 923 # syscall names for PID 923 + syscount -c ls # syscall names for "ls" + + See the man page and example file for more info. + END + exit 1 + esac +done +shift $(( $OPTIND - 1 )) + +### option logic +if (( $# > 0 )); then + opt_cmd=1 + cmd="$@" + cpus= +fi +if (( opt_pid + opt_duration + opt_cmd > 1 )); then + echo >&2 "ERROR: Pick one of {-p PID|-n name|-d seconds|command}" + exit 1 +fi +if (( opt_tail )); then + tcmd="tail -$tnum" + ttext=" Top $tnum only." +fi +if (( opt_duration )); then + cmd="sleep $duration" + echo "Tracing for $duration seconds.$ttext.." +fi +if (( opt_pid )); then + cpus= + cmd="-p $pid" + echo "Tracing PID $pid.$ttext.. Ctrl-C to end." +fi +(( opt_cmd )) && echo "Tracing while running: \"$cmd\".$ttext.." +(( opt_pid + opt_duration + opt_cmd == 0 )) && \ + echo "Tracing.$ttext.. Ctrl-C to end." +(( stdout_workaround )) && opts="-o /dev/stdout" + +ulimit -n 32768 # often needed + +### execute syscall name mode +if (( opt_count && ! opt_verbose )); then + : ${cmd:=sleep 999999} + out=$(./perf stat $opts -e 'syscalls:sys_enter_*' $cpus $cmd) + printf "%-17s %8s\n" "SYSCALL" "COUNT" + echo "$out" | awk ' + $1 && $2 ~ /syscalls:/ { + sub("syscalls:sys_enter_", ""); sub(":", "") + gsub(",", "") + printf "%-17s %8s\n", $2, $1 + }' | sort -n -k2 | $tcmd + exit +fi + +### execute syscall name with pid mode +if (( opt_count && opt_verbose )); then + if (( write_workaround )); then + # this list must end in write to associate the filter + tp=$(./perf list syscalls:sys_enter_* | awk ' + $1 != "syscalls:sys_enter_write" && $1 ~ /syscalls:/ { printf "-e %s ", $1 }') + tp="$tp -e syscalls:sys_enter_write" + sh -c "./perf record $tp --filter 'common_pid != '\$\$ $cpus $cmd" + else + ./perf record 'syscalls:sys_enter_*' $cpus $cmd + # could also pipe direct to perf script + fi + + printf "%-6s %-16s %-17s %8s\n" "PID" "COMM" "SYSCALL" "COUNT" + ./perf script --fields pid,comm,event | awk '$1 != "#" { + sub("syscalls:sys_enter_", ""); sub(":", "") + a[$1 ";" $2 ";" $3]++ + } + END { + for (k in a) { + split(k, b, ";"); + printf "%-6s %-16s %-17s %8d\n", b[2], b[1], b[3], a[k] + } + }' | sort -n -k4 | $tcmd + exit +fi + +### execute process name mode +tp="-e raw_syscalls:sys_enter" +if (( write_workaround )); then + sh -c "./perf record $tp --filter 'common_pid != '\$\$ $cpus $cmd" +else + ./perf record $tp $cpus $cmd +fi + +if (( opt_verbose )); then + printf "%-6s %-16s %8s\n" "PID" "COMM" "COUNT" + ./perf script --fields pid,comm | awk '$1 != "#" { a[$1 ";" $2]++ } + END { + for (k in a) { + split(k, b, ";"); + printf "%-6s %-16s %8d\n", b[2], b[1], a[k] + } + }' | sort -n -k3 | $tcmd +else + printf "%-16s %8s\n" "COMM" "COUNT" + ./perf script --fields comm | awk '$1 != "#" { a[$1]++ } + END { + for (k in a) { + printf "%-16s %8d\n", k, a[k] + } + }' | sort -n -k2 | $tcmd +fi diff --git a/tools/labs/templates/memory_mapping/kmmap/Kbuild b/tools/labs/templates/memory_mapping/kmmap/Kbuild new file mode 100644 index 00000000000000..3df7ecec2cab89 --- /dev/null +++ b/tools/labs/templates/memory_mapping/kmmap/Kbuild @@ -0,0 +1,3 @@ +ccflags-y = -Wno-unused-function -Wno-unused-label -Wno-unused-variable + +obj-m := kmmap.o diff --git a/tools/labs/templates/memory_mapping/kmmap/kmmap.c b/tools/labs/templates/memory_mapping/kmmap/kmmap.c new file mode 100644 index 00000000000000..070786f8b6184f --- /dev/null +++ b/tools/labs/templates/memory_mapping/kmmap/kmmap.c @@ -0,0 +1,229 @@ +/* + * PSO - Memory Mapping Lab(#11) + * + * Exercise #1: memory mapping using kmalloc'd kernel areas + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../test/mmap-test.h" + +MODULE_DESCRIPTION("simple mmap driver"); +MODULE_AUTHOR("PSO"); +MODULE_LICENSE("Dual BSD/GPL"); + +#define MY_MAJOR 42 +/* how many pages do we actually kmalloc */ +#define NPAGES 16 + +/* character device basic structure */ +static struct cdev mmap_cdev; + +/* pointer to kmalloc'd area */ +static void *kmalloc_ptr; + +/* pointer to the kmalloc'd area, rounded up to a page boundary */ +static char *kmalloc_area; + +static int my_open(struct inode *inode, struct file *filp) +{ + return 0; +} + +static int my_release(struct inode *inode, struct file *filp) +{ + return 0; +} + +static int my_read(struct file *file, char __user *user_buffer, + size_t size, loff_t *offset) +{ + /* TODO 2/2: check size doesn't exceed our mapped area size */ + if (size > NPAGES * PAGE_SIZE) + size = NPAGES * PAGE_SIZE; + + /* TODO 2/2: copy from mapped area to user buffer */ + if (copy_to_user(user_buffer, kmalloc_area, size)) + return -EFAULT; + + return size; +} + +static int my_write(struct file *file, const char __user *user_buffer, + size_t size, loff_t *offset) +{ + /* TODO 2/2: check size doesn't exceed our mapped area size */ + if (size > NPAGES * PAGE_SIZE) + size = NPAGES * PAGE_SIZE; + + /* TODO 2/3: copy from user buffer to mapped area */ + memset(kmalloc_area, 0, NPAGES * PAGE_SIZE); + if (copy_from_user(kmalloc_area, user_buffer, size)) + return -EFAULT; + + return size; +} + +static int my_mmap(struct file *filp, struct vm_area_struct *vma) +{ + int ret; + long length = vma->vm_end - vma->vm_start; + + /* do not map more than we can */ + if (length > NPAGES * PAGE_SIZE) + return -EIO; + + /* TODO 1/7: map the whole physically contiguous area in one piece */ + ret = remap_pfn_range(vma, vma->vm_start, + virt_to_phys((void *)kmalloc_area) >> PAGE_SHIFT, + length, vma->vm_page_prot); + if (ret < 0) { + pr_err("could not map address area\n"); + return ret; + } + + return 0; +} + +static const struct file_operations mmap_fops = { + .owner = THIS_MODULE, + .open = my_open, + .release = my_release, + .mmap = my_mmap, + .read = my_read, + .write = my_write +}; + +static int my_seq_show(struct seq_file *seq, void *v) +{ + struct mm_struct *mm; + struct vm_area_struct *vma_iterator; + unsigned long total = 0; + + /* TODO 3: Get current process' mm_struct */ + mm = get_task_mm(current); + + /* TODO 3/6: Iterate through all memory mappings */ + vma_iterator = mm->mmap; + while (vma_iterator != NULL) { + pr_info("%lx %lx\n", vma_iterator->vm_start, vma_iterator->vm_end); + total += vma_iterator->vm_end - vma_iterator->vm_start; + vma_iterator = vma_iterator->vm_next; + } + + /* TODO 3: Release mm_struct */ + mmput(mm); + + /* TODO 3: write the total count to file */ + seq_printf(seq, "%lu %s\n", total, current->comm); + return 0; +} + +static int my_seq_open(struct inode *inode, struct file *file) +{ + /* TODO 3: Register the display function */ + return single_open(file, my_seq_show, NULL); +} + +static const struct proc_ops my_proc_ops = { + .proc_open = my_seq_open, + .proc_read = seq_read, + .proc_lseek = seq_lseek, + .proc_release = single_release, +}; + +static int __init my_init(void) +{ + int ret = 0; + int i; + /* TODO 3/7: create a new entry in procfs */ + struct proc_dir_entry *entry; + + entry = proc_create(PROC_ENTRY_NAME, 0, NULL, &my_proc_ops); + if (!entry) { + ret = -ENOMEM; + goto out; + } + + ret = register_chrdev_region(MKDEV(MY_MAJOR, 0), 1, "mymap"); + if (ret < 0) { + pr_err("could not register region\n"); + goto out_no_chrdev; + } + + /* TODO 1/6: allocate NPAGES+2 pages using kmalloc */ + kmalloc_ptr = kmalloc((NPAGES + 2) * PAGE_SIZE, GFP_KERNEL); + if (kmalloc_ptr == NULL) { + ret = -ENOMEM; + pr_err("could not allocate memory\n"); + goto out_unreg; + } + + /* TODO 1: round kmalloc_ptr to nearest page start address */ + kmalloc_area = (char *) PAGE_ALIGN(((unsigned long)kmalloc_ptr)); + + /* TODO 1/2: mark pages as reserved */ + for (i = 0; i < NPAGES * PAGE_SIZE; i += PAGE_SIZE) + SetPageReserved(virt_to_page(((unsigned long)kmalloc_area)+i)); + + /* TODO 1/6: write data in each page */ + for (i = 0; i < NPAGES * PAGE_SIZE; i += PAGE_SIZE) { + kmalloc_area[i] = 0xaa; + kmalloc_area[i + 1] = 0xbb; + kmalloc_area[i + 2] = 0xcc; + kmalloc_area[i + 3] = 0xdd; + } + + /* Init device. */ + cdev_init(&mmap_cdev, &mmap_fops); + ret = cdev_add(&mmap_cdev, MKDEV(MY_MAJOR, 0), 1); + if (ret < 0) { + pr_err("could not add device\n"); + goto out_kfree; + } + + return 0; + +out_kfree: + kfree(kmalloc_ptr); +out_unreg: + unregister_chrdev_region(MKDEV(MY_MAJOR, 0), 1); +out_no_chrdev: + remove_proc_entry(PROC_ENTRY_NAME, NULL); +out: + return ret; +} + +static void __exit my_exit(void) +{ + int i; + + cdev_del(&mmap_cdev); + + /* TODO 1/3: clear reservation on pages and free mem. */ + for (i = 0; i < NPAGES * PAGE_SIZE; i += PAGE_SIZE) + ClearPageReserved(virt_to_page(((unsigned long)kmalloc_area)+i)); + kfree(kmalloc_ptr); + + unregister_chrdev_region(MKDEV(MY_MAJOR, 0), 1); + /* TODO 3: remove proc entry */ + remove_proc_entry(PROC_ENTRY_NAME, NULL); +} + +module_init(my_init); +module_exit(my_exit); diff --git a/tools/labs/templates/memory_mapping/test/.gitignore b/tools/labs/templates/memory_mapping/test/.gitignore new file mode 100644 index 00000000000000..2a2fb151cce3b2 --- /dev/null +++ b/tools/labs/templates/memory_mapping/test/.gitignore @@ -0,0 +1 @@ +/mmap-test diff --git a/tools/labs/templates/memory_mapping/test/Makefile b/tools/labs/templates/memory_mapping/test/Makefile new file mode 100644 index 00000000000000..8639d2a7d45f53 --- /dev/null +++ b/tools/labs/templates/memory_mapping/test/Makefile @@ -0,0 +1,9 @@ +CFLAGS=-Wall -m32 +LDFLAGS=-static -m32 + +mmap-test: mmap-test.o + +.PHONY: clean + +clean: + -rm -f *~ *.o mmap-test diff --git a/tools/labs/templates/memory_mapping/test/mmap-test.c b/tools/labs/templates/memory_mapping/test/mmap-test.c new file mode 100644 index 00000000000000..a4aa5669149d1e --- /dev/null +++ b/tools/labs/templates/memory_mapping/test/mmap-test.c @@ -0,0 +1,173 @@ +/* + * PSO - Memory Mapping Lab (#11) + * + * Exercise #1, #2: memory mapping between user-space and kernel-space + * + * test case + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "mmap-test.h" + +#define NPAGES 16 +#define MMAP_DEV "/dev/mymmap" +#define PROC_ENTRY_PATH "/proc/" PROC_ENTRY_NAME + +void test_contents(unsigned char *addr, + unsigned char value1, unsigned char value2, + unsigned char value3, unsigned char value4) +{ + int i; + + for (i = 0; i < NPAGES * getpagesize(); i += getpagesize()) { + if (addr[i] != value1 || addr[i + 1] != value2 || + addr[i + 2] != value3 || addr[i + 3] != value4) + printf("0x%x 0x%x 0x%x 0x%x\n", addr[i], addr[i+1], + addr[i+2], addr[i+3]); + else + printf("matched\n"); + } +} + +int test_read_write(int fd, unsigned char *mmap_addr) +{ + unsigned char *local_addr; + int len = NPAGES * getpagesize(), i; + + printf("\nWrite test ...\n"); + /* alloc local memory */ + local_addr = malloc(len); + if (!local_addr) + return -1; + + /* init local memory */ + memset(local_addr, 0, len); + for (i = 0; i < NPAGES * getpagesize(); i += getpagesize()) { + local_addr[i] = 0xa0; + local_addr[i+1] = 0xb0; + local_addr[i+2] = 0xc0; + local_addr[i+3] = 0xd0; + } + + /* write to device */ + write(fd, local_addr, len); + + /* are these values in mapped memory? */ + test_contents(mmap_addr, 0xa0, 0xb0, 0xc0, 0xd0); + + printf("\nRead test ...\n"); + memset(local_addr, 0, len); + /* read from device */ + read(fd, local_addr, len); + /* are the values read correct? */ + test_contents(local_addr, 0xa0, 0xb0, 0xc0, 0xd0); + return 0; +} + +static int show_mem_usage(void) +{ + int fd, ret; + char buf[40]; + unsigned long mem_usage; + + fd = open(PROC_ENTRY_PATH, O_RDONLY); + if (fd < 0) { + perror("open " PROC_ENTRY_PATH); + ret = fd; + goto out; + } + + ret = read(fd, buf, sizeof buf); + if (ret < 0) + goto no_read; + + sscanf(buf, "%lu", &mem_usage); + buf[ret] = 0; + + printf("Memory usage: %lu\n", mem_usage); + + ret = mem_usage; +no_read: + close(fd); +out: + return ret; +} + +int main(int argc, const char **argv) +{ + int fd, test; + unsigned char *addr; + int len = NPAGES * getpagesize(); + int i; + unsigned long usage_before_mmap, usage_after_mmap; + + if (argc > 1) + test = atoi(argv[1]); + + assert(system("mknod " MMAP_DEV " c 42 0") == 0); + + fd = open(MMAP_DEV, O_RDWR | O_SYNC); + if (fd < 0) { + perror("open"); + assert(system("rm " MMAP_DEV) == 0); + exit(EXIT_FAILURE); + } + + addr = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0); + if (addr == MAP_FAILED) { + perror("mmap"); + assert(system("rm " MMAP_DEV) == 0); + exit(EXIT_FAILURE); + } + + for (i = 0; i < NPAGES * getpagesize(); i += getpagesize()) { + if (addr[i] != 0xaa || addr[i + 1] != 0xbb || + addr[i + 2] != 0xcc || addr[i + 3] != 0xdd) + printf("0x%x 0x%x 0x%x 0x%x\n", addr[i], addr[i+1], + addr[i+2], addr[i+3]); + else + printf("matched\n"); + } + + + if (test >= 2 && test_read_write(fd, addr)) { + perror("read/write test"); + assert(system("rm " MMAP_DEV) == 0); + exit(EXIT_FAILURE); + } + + if (test >= 3) { + usage_before_mmap = show_mem_usage(); + if (usage_before_mmap < 0) + printf("failed to show memory usage\n"); + + #define SIZE (10 * 1024 * 1024) + addr = mmap(NULL, SIZE, PROT_READ, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); + if (addr == MAP_FAILED) + perror("mmap_"); + + usage_after_mmap = show_mem_usage(); + if (usage_after_mmap < 0) + printf("failed to show memory usage\n"); + printf("mmaped :%lu MB\n", + (usage_after_mmap - usage_before_mmap) >> 20); + + sleep(30); + + munmap(addr, SIZE); + } + + close(fd); + + assert(system("rm " MMAP_DEV) == 0); + + return 0; +} diff --git a/tools/labs/templates/memory_mapping/test/mmap-test.h b/tools/labs/templates/memory_mapping/test/mmap-test.h new file mode 100644 index 00000000000000..8d98f57e319240 --- /dev/null +++ b/tools/labs/templates/memory_mapping/test/mmap-test.h @@ -0,0 +1,6 @@ +#ifndef __SO2MMAP_H__ +#define __SO2MMAP_H__ 1 + +#define PROC_ENTRY_NAME "my-proc-entry" + +#endif diff --git a/tools/labs/templates/memory_mapping/vmmap/Kbuild b/tools/labs/templates/memory_mapping/vmmap/Kbuild new file mode 100644 index 00000000000000..eaf763a4a9d505 --- /dev/null +++ b/tools/labs/templates/memory_mapping/vmmap/Kbuild @@ -0,0 +1,3 @@ +ccflags-y = -Wno-unused-function -Wno-unused-label -Wno-unused-variable + +obj-m := vmmap.o diff --git a/tools/labs/templates/memory_mapping/vmmap/vmmap.c b/tools/labs/templates/memory_mapping/vmmap/vmmap.c new file mode 100644 index 00000000000000..d013edcf33ee90 --- /dev/null +++ b/tools/labs/templates/memory_mapping/vmmap/vmmap.c @@ -0,0 +1,226 @@ +/* + * PSO - Memory Mapping Lab(#11) + * + * Exercise #2: memory mapping using vmalloc'd kernel areas + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../test/mmap-test.h" + + +MODULE_DESCRIPTION("simple mmap driver"); +MODULE_AUTHOR("PSO"); +MODULE_LICENSE("Dual BSD/GPL"); + +#define MY_MAJOR 42 + +/* how many pages do we actually vmalloc */ +#define NPAGES 16 + +/* character device basic structure */ +static struct cdev mmap_cdev; + +/* pointer to the vmalloc'd area, rounded up to a page boundary */ +static char *vmalloc_area; + +static int my_open(struct inode *inode, struct file *filp) +{ + return 0; +} + +static int my_release(struct inode *inode, struct file *filp) +{ + return 0; +} + +static ssize_t my_read(struct file *file, char __user *user_buffer, + size_t size, loff_t *offset) +{ + /* TODO 2/2: check size doesn't exceed our mapped area size */ + if (size > NPAGES * PAGE_SIZE) + size = NPAGES * PAGE_SIZE; + + /* TODO 2/2: copy from mapped area to user buffer */ + if (copy_to_user(user_buffer, vmalloc_area, size)) + return -EFAULT; + + return size; +} + +static ssize_t my_write(struct file *file, const char __user *user_buffer, + size_t size, loff_t *offset) +{ + /* TODO 2/2: check size doesn't exceed our mapped area size */ + if (size > NPAGES * PAGE_SIZE) + size = NPAGES * PAGE_SIZE; + + /* TODO 2/3: copy from user buffer to mapped area */ + memset(vmalloc_area, 0, NPAGES * PAGE_SIZE); + if (copy_from_user(vmalloc_area, user_buffer, size)) + return -EFAULT; + + return size; +} + +static int my_mmap(struct file *filp, struct vm_area_struct *vma) +{ + int ret; + long length = vma->vm_end - vma->vm_start; + unsigned long start = vma->vm_start; + char *vmalloc_area_ptr = vmalloc_area; + unsigned long pfn; + + if (length > NPAGES * PAGE_SIZE) + return -EIO; + + /* TODO 1/9: map pages individually */ + while (length > 0) { + pfn = vmalloc_to_pfn(vmalloc_area_ptr); + ret = remap_pfn_range(vma, start, pfn, PAGE_SIZE, PAGE_SHARED); + if (ret < 0) + return ret; + start += PAGE_SIZE; + vmalloc_area_ptr += PAGE_SIZE; + length -= PAGE_SIZE; + } + + return 0; +} + +static const struct file_operations mmap_fops = { + .owner = THIS_MODULE, + .open = my_open, + .release = my_release, + .mmap = my_mmap, + .read = my_read, + .write = my_write +}; + +static int my_seq_show(struct seq_file *seq, void *v) +{ + struct mm_struct *mm; + struct vm_area_struct *vma_iterator; + unsigned long total = 0; + + /* TODO 3: Get current process' mm_struct */ + mm = get_task_mm(current); + + /* TODO 3/6: Iterate through all memory mappings and print ranges */ + vma_iterator = mm->mmap; + while (vma_iterator != NULL) { + pr_info("%lx %lx\n", vma_iterator->vm_start, vma_iterator->vm_end); + total += vma_iterator->vm_end - vma_iterator->vm_start; + vma_iterator = vma_iterator->vm_next; + } + + /* TODO 3: Release mm_struct */ + mmput(mm); + + /* TODO 3: write the total count to file */ + seq_printf(seq, "%lu %s\n", total, current->comm); + return 0; +} + +static int my_seq_open(struct inode *inode, struct file *file) +{ + /* TODO 3: Register the display function */ + return single_open(file, my_seq_show, NULL); +} + +static const struct proc_ops my_proc_ops = { + .proc_open = my_seq_open, + .proc_read = seq_read, + .proc_lseek = seq_lseek, + .proc_release = single_release, +}; + +static int __init my_init(void) +{ + int ret = 0; + int i; + /* TODO 3/7: create a new entry in procfs */ + struct proc_dir_entry *entry; + + entry = proc_create(PROC_ENTRY_NAME, 0, NULL, &my_proc_ops); + if (!entry) { + ret = -ENOMEM; + goto out; + } + + ret = register_chrdev_region(MKDEV(MY_MAJOR, 0), 1, "mymap"); + if (ret < 0) { + pr_err("could not register region\n"); + goto out_no_chrdev; + } + + /* TODO 1/6: allocate NPAGES using vmalloc */ + vmalloc_area = (char *)vmalloc(NPAGES * PAGE_SIZE); + if (vmalloc_area == NULL) { + ret = -ENOMEM; + pr_err("could not allocate memory\n"); + goto out_unreg; + } + + /* TODO 1/2: mark pages as reserved */ + for (i = 0; i < NPAGES * PAGE_SIZE; i += PAGE_SIZE) + SetPageReserved(vmalloc_to_page(vmalloc_area+i)); + + /* TODO 1/6: write data in each page */ + for (i = 0; i < NPAGES * PAGE_SIZE; i += PAGE_SIZE) { + vmalloc_area[i] = 0xaa; + vmalloc_area[i + 1] = 0xbb; + vmalloc_area[i + 2] = 0xcc; + vmalloc_area[i + 3] = 0xdd; + } + + cdev_init(&mmap_cdev, &mmap_fops); + ret = cdev_add(&mmap_cdev, MKDEV(MY_MAJOR, 0), 1); + if (ret < 0) { + pr_err("could not add device\n"); + goto out_vfree; + } + + return 0; + +out_vfree: + vfree(vmalloc_area); +out_unreg: + unregister_chrdev_region(MKDEV(MY_MAJOR, 0), 1); +out_no_chrdev: + remove_proc_entry(PROC_ENTRY_NAME, NULL); +out: + return ret; +} + +static void __exit my_exit(void) +{ + int i; + + cdev_del(&mmap_cdev); + + /* TODO 1/3: clear reservation on pages and free mem.*/ + for (i = 0; i < NPAGES * PAGE_SIZE; i += PAGE_SIZE) + ClearPageReserved(vmalloc_to_page(vmalloc_area+i)); + vfree(vmalloc_area); + + unregister_chrdev_region(MKDEV(MY_MAJOR, 0), 1); + /* TODO 3: remove proc entry */ + remove_proc_entry(PROC_ENTRY_NAME, NULL); +} + +module_init(my_init); +module_exit(my_exit); diff --git a/tools/labs/templates/networking/1-2-netfilter/kernel/Kbuild b/tools/labs/templates/networking/1-2-netfilter/kernel/Kbuild new file mode 100644 index 00000000000000..8d831f886a624c --- /dev/null +++ b/tools/labs/templates/networking/1-2-netfilter/kernel/Kbuild @@ -0,0 +1,3 @@ +EXTRA_CFLAGS = -g + +obj-m = filter.o diff --git a/tools/labs/templates/networking/1-2-netfilter/kernel/filter.c b/tools/labs/templates/networking/1-2-netfilter/kernel/filter.c new file mode 100644 index 00000000000000..91850db9aad3c5 --- /dev/null +++ b/tools/labs/templates/networking/1-2-netfilter/kernel/filter.c @@ -0,0 +1,163 @@ +/* + * SO2 - Networking Lab (#10) + * + * Exercise #1, #2: simple netfilter module + * + * Code skeleton. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "filter.h" + +MODULE_DESCRIPTION("Simple netfilter module"); +MODULE_AUTHOR("SO2"); +MODULE_LICENSE("GPL"); + +#define LOG_LEVEL KERN_ALERT +#define MY_DEVICE "filter" + +static struct cdev my_cdev; +static atomic_t ioctl_set; +static unsigned int ioctl_set_addr; + + +/* Test ioctl_set_addr if it has been set. + */ +static int test_daddr(unsigned int dst_addr) +{ + int ret = 0; + + /* TODO 2/4: return non-zero if address has been set + * *and* matches dst_addr + */ + if (atomic_read(&ioctl_set) == 1) + ret = (ioctl_set_addr == dst_addr); + else + ret = 1; + + return ret; +} + +/* TODO 1/20: netfilter hook function */ +static unsigned int my_nf_hookfn(void *priv, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + /* get IP header */ + struct iphdr *iph = ip_hdr(skb); + + if (iph->protocol == IPPROTO_TCP && test_daddr(iph->daddr)) { + /* get TCP header */ + struct tcphdr *tcph = tcp_hdr(skb); + /* test for connection initiating packet */ + if (tcph->syn && !tcph->ack) + printk(LOG_LEVEL "TCP connection initiated from " + "%pI4:%u\n", + &iph->saddr, ntohs(tcph->source)); + } + + /* let the package pass */ + return NF_ACCEPT; +} + +static int my_open(struct inode *inode, struct file *file) +{ + return 0; +} + +static int my_close(struct inode *inode, struct file *file) +{ + return 0; +} + +static long my_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + switch (cmd) { + case MY_IOCTL_FILTER_ADDRESS: + /* TODO 2/4: set filter address from arg */ + if (copy_from_user(&ioctl_set_addr, (void *) arg, + sizeof(ioctl_set_addr))) + return -EFAULT; + atomic_set(&ioctl_set, 1); + break; + + default: + return -ENOTTY; + } + + return 0; +} + +static const struct file_operations my_fops = { + .owner = THIS_MODULE, + .open = my_open, + .release = my_close, + .unlocked_ioctl = my_ioctl +}; + +/* TODO 1/6: define netfilter hook operations structure */ +static struct nf_hook_ops my_nfho = { + .hook = my_nf_hookfn, + .hooknum = NF_INET_LOCAL_OUT, + .pf = PF_INET, + .priority = NF_IP_PRI_FIRST +}; + +int __init my_hook_init(void) +{ + int err; + + /* register filter device */ + err = register_chrdev_region(MKDEV(MY_MAJOR, 0), 1, MY_DEVICE); + if (err != 0) + return err; + + atomic_set(&ioctl_set, 0); + ioctl_set_addr = 0; + + /* init & add device */ + cdev_init(&my_cdev, &my_fops); + cdev_add(&my_cdev, MKDEV(MY_MAJOR, 0), 1); + + /* TODO 1/3: register netfilter hook */ + err = nf_register_net_hook(&init_net, &my_nfho); + if (err) + goto out; + + return 0; + +out: + /* cleanup */ + cdev_del(&my_cdev); + unregister_chrdev_region(MKDEV(MY_MAJOR, 0), 1); + + return err; +} + +void __exit my_hook_exit(void) +{ + /* TODO 1/1: unregister hook */ + nf_unregister_net_hook(&init_net, &my_nfho); + + /* cleanup device */ + cdev_del(&my_cdev); + unregister_chrdev_region(MKDEV(MY_MAJOR, 0), 1); +} + +module_init(my_hook_init); +module_exit(my_hook_exit); diff --git a/tools/labs/templates/networking/1-2-netfilter/kernel/filter.h b/tools/labs/templates/networking/1-2-netfilter/kernel/filter.h new file mode 100644 index 00000000000000..ad2f73c9fd000a --- /dev/null +++ b/tools/labs/templates/networking/1-2-netfilter/kernel/filter.h @@ -0,0 +1,11 @@ +#ifndef _FILTER_H_ +#define _FILTER_H_ + +#include + +/* ioctl command to pass address to filter driver */ +#define MY_IOCTL_FILTER_ADDRESS _IOW('k', 1, unsigned int) + +#define MY_MAJOR 42 + +#endif /* _FILTER_H_ */ diff --git a/tools/labs/templates/networking/1-2-netfilter/user/.gitignore b/tools/labs/templates/networking/1-2-netfilter/user/.gitignore new file mode 100644 index 00000000000000..ee4c92682341e4 --- /dev/null +++ b/tools/labs/templates/networking/1-2-netfilter/user/.gitignore @@ -0,0 +1 @@ +/test diff --git a/tools/labs/templates/networking/1-2-netfilter/user/Makefile b/tools/labs/templates/networking/1-2-netfilter/user/Makefile new file mode 100644 index 00000000000000..0d5af50006725c --- /dev/null +++ b/tools/labs/templates/networking/1-2-netfilter/user/Makefile @@ -0,0 +1,16 @@ +# +# SO2 - Networking Lab (#10) +# +# Makefile for test filter module +# + +CFLAGS = -Wall -static -m32 + +all: test + +test: test.c + +.PHONY: clean + +clean: + -rm -f test *~ *.o diff --git a/tools/labs/templates/networking/1-2-netfilter/user/test-1.sh b/tools/labs/templates/networking/1-2-netfilter/user/test-1.sh new file mode 100755 index 00000000000000..d78c482564c7ee --- /dev/null +++ b/tools/labs/templates/networking/1-2-netfilter/user/test-1.sh @@ -0,0 +1,25 @@ +#!/bin/sh +# +# SO2 - Networking Lab (#10) +# +# Test script for exercise #1 +# + +# insert module +insmod ../kernel/filter.ko || exit 1 + +# listen for connections on localhost, port 60000 (run in background) +../../netcat -l -p 60000 & + +# wait for netcat to start listening +sleep 1 + +# connect to localhost, port 60000, starting a connection using local +# port number 600001; +echo "Should show up in filter." | ../../netcat -q 2 127.0.0.1 60000 + +# look for filter message in dmesg output +echo "Check dmesg output." + +# remove module +rmmod filter || exit 1 diff --git a/tools/labs/templates/networking/1-2-netfilter/user/test-2.sh b/tools/labs/templates/networking/1-2-netfilter/user/test-2.sh new file mode 100755 index 00000000000000..37d07cedb74ace --- /dev/null +++ b/tools/labs/templates/networking/1-2-netfilter/user/test-2.sh @@ -0,0 +1,41 @@ +#!/bin/sh +# +# SO2 - Networking Lab (#10) +# +# Test script for exercise #2 +# + +# insert module +insmod ../kernel/filter.ko || exit 1 + +# set filter IP address to 127.0.0.1 +./test 127.0.0.1 + +# listen for connections on localhost, port 60000 (run in background) +../../netcat -l -p 60000 & + +# wait for netcat to start listening +sleep 1 + +# connect to localhost, port 60000, starting a connection using local +# port number 600001; +echo "Should show up in filter." | ../../netcat -q 2 127.0.0.1 60000 + +# set filter IP address to 127.0.0.2 +./test 127.0.0.2 + +# listen for connections on localhost, port 60000 (run in background) +../../netcat -l -p 60000 & + +# wait for netcat to start listening +sleep 1 + +# connect to localhost, port 60000, starting a connection using local +# port number 600001; +echo "Should NOT show up in filter." | ../../netcat -q 2 127.0.0.1 60000 + +# look for filter message in dmesg output +echo "Check dmesg output." + +# remove module +rmmod filter || exit 1 diff --git a/tools/labs/templates/networking/1-2-netfilter/user/test.c b/tools/labs/templates/networking/1-2-netfilter/user/test.c new file mode 100644 index 00000000000000..775edb458286c7 --- /dev/null +++ b/tools/labs/templates/networking/1-2-netfilter/user/test.c @@ -0,0 +1,73 @@ +/* + * SO2 - Networking Lab (#11) + * + * Test filter module for exercise #2 + * + * Sends MY_IOCTL_FILTER_ADDRESS to filter module. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../kernel/filter.h" + +#define MY_DEVICE "/dev/filter" + + +static void print_usage(char *argv0) +{ + fprintf(stderr, "Usage: %s
\n" + "\taddress must be a string containing " + "an IP dotted address\n", argv0); +} + +int main(int argc, char **argv) +{ + int fd; + unsigned int addr; + + if (argc != 2) { + print_usage(argv[0]); + exit(EXIT_FAILURE); + } + + /* get address */ + addr = inet_addr(argv[1]); + + /* make device node */ + if (mknod(MY_DEVICE, 0644 | S_IFCHR, makedev(MY_MAJOR, 0)) < 0) { + if (errno != EEXIST) { + perror("mknod " MY_DEVICE); + exit(EXIT_FAILURE); + } + } + + /* open device */ + fd = open(MY_DEVICE, O_RDONLY); + if (fd < 0) { + perror("open " MY_DEVICE); + } else { + /* send ioctl */ + if (ioctl(fd, MY_IOCTL_FILTER_ADDRESS, &addr) < 0) + perror("ioctl MY_IOCTL_FILTER_ADDRESS"); + + /* close device */ + if (close(fd) < 0) + perror("close"); + } + + /* cleanup device node */ + if (unlink(MY_DEVICE) < 0) + perror("unlink " MY_DEVICE); + + return 0; +} diff --git a/tools/labs/templates/networking/3-4-tcp-sock/Kbuild b/tools/labs/templates/networking/3-4-tcp-sock/Kbuild new file mode 100644 index 00000000000000..fa55ec98e71d78 --- /dev/null +++ b/tools/labs/templates/networking/3-4-tcp-sock/Kbuild @@ -0,0 +1,3 @@ +EXTRA_CFLAGS = -Wall -g + +obj-m = tcp_sock.o diff --git a/tools/labs/templates/networking/3-4-tcp-sock/tcp_sock.c b/tools/labs/templates/networking/3-4-tcp-sock/tcp_sock.c new file mode 100644 index 00000000000000..d3ea78a1a3e496 --- /dev/null +++ b/tools/labs/templates/networking/3-4-tcp-sock/tcp_sock.c @@ -0,0 +1,129 @@ +/* + * SO2 - Networking Lab (#10) + * + * Exercise #3, #4: simple kernel TCP socket + * + * Code skeleton. + */ + +#include +#include +#include +#include +#include +#include +#include + +MODULE_DESCRIPTION("Simple kernel TCP socket"); +MODULE_AUTHOR("SO2"); +MODULE_LICENSE("GPL"); + +#define LOG_LEVEL KERN_ALERT +#define MY_TCP_PORT 60000 +#define LISTEN_BACKLOG 5 + +#define ON 1 +#define OFF 0 +#define DEBUG ON + +#if DEBUG == ON +#define LOG(s) \ + do { \ + printk(KERN_DEBUG s "\n"); \ + } while (0) +#else +#define LOG(s) \ + do {} while (0) +#endif + +#define print_sock_address(addr) \ + do { \ + printk(LOG_LEVEL "connection established to " \ + "%pI4:%d\n", \ + &addr.sin_addr.s_addr, \ + ntohs(addr.sin_port)); \ + } while (0) + +static struct socket *sock; /* listening (server) socket */ +static struct socket *new_sock; /* communication socket */ + +int __init my_tcp_sock_init(void) +{ + int err; + /* address to bind on */ + struct sockaddr_in addr = { + .sin_family = AF_INET, + .sin_port = htons(MY_TCP_PORT), + .sin_addr = { htonl(INADDR_LOOPBACK) } + }; + int addrlen = sizeof(addr); + /* address of peer */ + struct sockaddr_in raddr; + + /* TODO 1/5: create listening socket */ + err = sock_create_kern(&init_net, PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock); + if (err < 0) { + printk(LOG_LEVEL "can't create socket\n"); + goto out; + } + + /* TODO 1/5: bind socket to loopback on port MY_TCP_PORT */ + err = sock->ops->bind(sock, (struct sockaddr *) &addr, addrlen); + if (err < 0) { + printk(LOG_LEVEL "can't bind socket\n"); + goto out_release; + } + + /* TODO 1/5: start listening */ + err = sock->ops->listen(sock, LISTEN_BACKLOG); + if (err < 0) { + printk(LOG_LEVEL "can't listen on socket\n"); + goto out_release; + } + + /* TODO 2/6: create new socket for the accepted connection */ + err = sock_create_lite(PF_INET, SOCK_STREAM, IPPROTO_TCP, &new_sock); + if (err < 0) { + printk(LOG_LEVEL "can't create new socket\n"); + goto out; + } + new_sock->ops = sock->ops; + + /* TODO 2/5: accept a connection */ + err = sock->ops->accept(sock, new_sock, 0, true); + if (err < 0) { + printk(LOG_LEVEL "can't accept new connection\n"); + goto out_release_new_sock; + } + + /* TODO 2/6: get the address of the peer and print it */ + err = sock->ops->getname(new_sock, (struct sockaddr *) &raddr, 1); + if (err < 0) { + printk(LOG_LEVEL "can't find peer name\n"); + goto out_release_new_sock; + } + print_sock_address(raddr); + + return 0; + +out_release_new_sock: + /* TODO 2/1: cleanup socket for accepted connection */ + sock_release(new_sock); +out_release: + /* TODO 1/1: cleanup listening socket */ + sock_release(sock); +out: + return err; +} + +void __exit my_tcp_sock_exit(void) +{ + /* TODO 2/1: cleanup socket for accepted connection */ + sock_release(new_sock); + + /* TODO 1/1: cleanup listening socket */ + sock_release(sock); +} + +module_init(my_tcp_sock_init); +module_exit(my_tcp_sock_exit); diff --git a/tools/labs/templates/networking/3-4-tcp-sock/test-3.sh b/tools/labs/templates/networking/3-4-tcp-sock/test-3.sh new file mode 100755 index 00000000000000..b3289dbbe584ef --- /dev/null +++ b/tools/labs/templates/networking/3-4-tcp-sock/test-3.sh @@ -0,0 +1,18 @@ +#!/bin/sh +# +# SO2 - Networking Lab (#10) +# +# Test script for exercise #3 +# + +set -x + +# insert module +insmod tcp_sock.ko || exit 1 + +# list all currently listening servers and active connections +# for both TCP and UDP, and don't resolve hostnames +netstat -tuan + +# remove module +rmmod tcp_sock || exit 1 diff --git a/tools/labs/templates/networking/3-4-tcp-sock/test-4.sh b/tools/labs/templates/networking/3-4-tcp-sock/test-4.sh new file mode 100755 index 00000000000000..345d85356070f4 --- /dev/null +++ b/tools/labs/templates/networking/3-4-tcp-sock/test-4.sh @@ -0,0 +1,29 @@ +#!/bin/sh +# +# SO2 - Networking Lab (#10) +# +# Test script for exercise #3 +# + +set -x + +# insert module (run in background, it waits for a connection) +insmod tcp_sock.ko & + +# wait for module to start listening +sleep 1 + +# list all currently listening servers and active connections +# for both TCP and UDP, and don't resolve hostnames +netstat -tuan + +# connect to localhost, port 60000, starting a connection using local +# port number 600001; +echo "Should connect." | ../netcat -q 4 127.0.0.1 60000 -p 60001 & + +# wait for connection to be established then remove module +# (and close connection) +sleep 3 + +# remove module +rmmod tcp_sock || exit 1 diff --git a/tools/labs/templates/networking/5-udp-sock/Kbuild b/tools/labs/templates/networking/5-udp-sock/Kbuild new file mode 100644 index 00000000000000..e42a05b84ca634 --- /dev/null +++ b/tools/labs/templates/networking/5-udp-sock/Kbuild @@ -0,0 +1,3 @@ +EXTRA_CFLAGS = -Wall -g + +obj-m = udp_sock.o diff --git a/tools/labs/templates/networking/5-udp-sock/test-5.sh b/tools/labs/templates/networking/5-udp-sock/test-5.sh new file mode 100755 index 00000000000000..9db69a99254e18 --- /dev/null +++ b/tools/labs/templates/networking/5-udp-sock/test-5.sh @@ -0,0 +1,26 @@ +#!/bin/sh +# +# SO2 - Networking Lab (#10) +# +# Test script for bonus exercise +# + +set -x + +# listen for UDP packets on localhost, port 60001 (run in background) +../netcat -l -u -p 60001 & + +# get pid of netcat +pid=$! + +# wait for netcat to start listening +sleep 1 + +# insert module, causing the message to be sent +insmod udp_sock.ko + +# remove module +rmmod udp_sock + +# kill netcat +kill $pid 2>/dev/null diff --git a/tools/labs/templates/networking/5-udp-sock/udp_sock.c b/tools/labs/templates/networking/5-udp-sock/udp_sock.c new file mode 100644 index 00000000000000..0b08f56d973d05 --- /dev/null +++ b/tools/labs/templates/networking/5-udp-sock/udp_sock.c @@ -0,0 +1,130 @@ +/* + * SO2 - Networking Lab (#10) + * + * Bonus: simple kernel UDP socket + * + * Code skeleton. + */ + +#include +#include +#include +#include +#include +#include + +MODULE_DESCRIPTION("Simple kernel UDP socket"); +MODULE_AUTHOR("SO2"); +MODULE_LICENSE("GPL"); + +#define LOG_LEVEL KERN_ALERT +#define MY_UDP_LOCAL_PORT 60000 +#define MY_UDP_REMOTE_PORT 60001 +#define MY_TEST_MESSAGE "kernelsocket\n" + +#define ON 1 +#define OFF 0 +#define DEBUG ON + +#if DEBUG == ON +#define LOG(s) \ + do { \ + printk(KERN_DEBUG s "\n"); \ + } while (0) +#else +#define LOG(s) \ + do {} while (0) +#endif + +#define print_sock_address(addr) \ + do { \ + printk(LOG_LEVEL "connection established to " \ + NIPQUAD_FMT ":%d\n", \ + NIPQUAD(addr.sin_addr.s_addr), \ + ntohs(addr.sin_port)); \ + } while (0) + +static struct socket *sock; /* UDP server */ + +/* send datagram */ +static int my_udp_msgsend(struct socket *s) +{ + /* address to send to */ + struct sockaddr_in raddr = { + .sin_family = AF_INET, + .sin_port = htons(MY_UDP_REMOTE_PORT), + .sin_addr = { htonl(INADDR_LOOPBACK) } + }; + int raddrlen = sizeof(raddr); + /* message */ + struct msghdr msg; + struct iovec iov; + char *buffer = MY_TEST_MESSAGE; + int len = strlen(buffer) + 1; + + /* TODO 1/7: build message */ + iov.iov_base = buffer; + iov.iov_len = len; + msg.msg_flags = 0; + msg.msg_name = &raddr; + msg.msg_namelen = raddrlen; + msg.msg_control = NULL; + msg.msg_controllen = 0; + + /* TODO 1/1: send the message down the socket and return the + * error code. + */ + return kernel_sendmsg(s, &msg, (struct kvec *) &iov, 1, len); + + return 0; +} + +int __init my_udp_sock_init(void) +{ + int err; + /* address to bind on */ + struct sockaddr_in addr = { + .sin_family = AF_INET, + .sin_port = htons(MY_UDP_LOCAL_PORT), + .sin_addr = { htonl(INADDR_LOOPBACK) } + }; + int addrlen = sizeof(addr); + + /* TODO 1/5: create UDP socket */ + err = sock_create_kern(&init_net, PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock); + if (err < 0) { + printk(LOG_LEVEL "can't create socket\n"); + goto out; + } + + /* TODO 1/5: bind socket to loopback on port MY_UDP_LOCAL_PORT */ + err = sock->ops->bind(sock, (struct sockaddr *) &addr, addrlen); + if (err < 0) { + printk(LOG_LEVEL "can't bind socket\n"); + goto out_release; + } + + /* send message */ + err = my_udp_msgsend(sock); + if (err < 0) { + printk(LOG_LEVEL "can't send message\n"); + goto out_release; + } + + return 0; + +out_release: + /* TODO 1/1: release socket */ + sock_release(sock); +out: + return err; +} + +void __exit my_udp_sock_exit(void) +{ + /* TODO 1/1: release socket */ + sock_release(sock); +} + +module_init(my_udp_sock_init); +module_exit(my_udp_sock_exit); diff --git a/tools/labs/templates/networking/netcat b/tools/labs/templates/networking/netcat new file mode 100755 index 00000000000000..27bf43d64c2191 Binary files /dev/null and b/tools/labs/templates/networking/netcat differ diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 28baee7ba1ca86..ad165e6e74bc0a 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -7649,6 +7649,16 @@ bool bpf_map__is_pinned(const struct bpf_map *map) return map->pinned; } +static void sanitize_pin_path(char *s) +{ + /* bpffs disallows periods in path names */ + while (*s) { + if (*s == '.') + *s = '_'; + s++; + } +} + int bpf_object__pin_maps(struct bpf_object *obj, const char *path) { struct bpf_map *map; @@ -7678,6 +7688,7 @@ int bpf_object__pin_maps(struct bpf_object *obj, const char *path) err = -ENAMETOOLONG; goto err_unpin_maps; } + sanitize_pin_path(buf); pin_path = buf; } else if (!map->pin_path) { continue; @@ -7722,6 +7733,7 @@ int bpf_object__unpin_maps(struct bpf_object *obj, const char *path) return -EINVAL; else if (len >= PATH_MAX) return -ENAMETOOLONG; + sanitize_pin_path(buf); pin_path = buf; } else if (!map->pin_path) { continue; diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c index cfcdbd7be066ea..17465d454a0e31 100644 --- a/tools/lib/perf/evlist.c +++ b/tools/lib/perf/evlist.c @@ -367,21 +367,13 @@ static struct perf_mmap* perf_evlist__alloc_mmap(struct perf_evlist *evlist, boo return map; } -static void perf_evlist__set_sid_idx(struct perf_evlist *evlist, - struct perf_evsel *evsel, int idx, int cpu, - int thread) +static void perf_evsel__set_sid_idx(struct perf_evsel *evsel, int idx, int cpu, int thread) { struct perf_sample_id *sid = SID(evsel, cpu, thread); sid->idx = idx; - if (evlist->cpus && cpu >= 0) - sid->cpu = evlist->cpus->map[cpu]; - else - sid->cpu = -1; - if (!evsel->system_wide && evlist->threads && thread >= 0) - sid->tid = perf_thread_map__pid(evlist->threads, thread); - else - sid->tid = -1; + sid->cpu = perf_cpu_map__cpu(evsel->cpus, cpu); + sid->tid = perf_thread_map__pid(evsel->threads, thread); } static struct perf_mmap* @@ -500,8 +492,7 @@ mmap_per_evsel(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops, if (perf_evlist__id_add_fd(evlist, evsel, cpu, thread, fd) < 0) return -1; - perf_evlist__set_sid_idx(evlist, evsel, idx, cpu, - thread); + perf_evsel__set_sid_idx(evsel, idx, cpu, thread); } } diff --git a/tools/lib/perf/tests/test-cpumap.c b/tools/lib/perf/tests/test-cpumap.c index c8d45091e7c26a..c70e9e03af3e99 100644 --- a/tools/lib/perf/tests/test-cpumap.c +++ b/tools/lib/perf/tests/test-cpumap.c @@ -27,5 +27,5 @@ int main(int argc, char **argv) perf_cpu_map__put(cpus); __T_END; - return 0; + return tests_failed == 0 ? 0 : -1; } diff --git a/tools/lib/perf/tests/test-evlist.c b/tools/lib/perf/tests/test-evlist.c index 6d8ebe0c25042f..bd19cabddaf62b 100644 --- a/tools/lib/perf/tests/test-evlist.c +++ b/tools/lib/perf/tests/test-evlist.c @@ -215,6 +215,7 @@ static int test_mmap_thread(void) sysfs__mountpoint()); if (filename__read_int(path, &id)) { + tests_failed++; fprintf(stderr, "error: failed to get tracepoint id: %s\n", path); return -1; } @@ -409,5 +410,5 @@ int main(int argc, char **argv) test_mmap_cpus(); __T_END; - return 0; + return tests_failed == 0 ? 0 : -1; } diff --git a/tools/lib/perf/tests/test-evsel.c b/tools/lib/perf/tests/test-evsel.c index 135722ac965bf7..0ad82d7a2a51b6 100644 --- a/tools/lib/perf/tests/test-evsel.c +++ b/tools/lib/perf/tests/test-evsel.c @@ -131,5 +131,5 @@ int main(int argc, char **argv) test_stat_thread_enable(); __T_END; - return 0; + return tests_failed == 0 ? 0 : -1; } diff --git a/tools/lib/perf/tests/test-threadmap.c b/tools/lib/perf/tests/test-threadmap.c index 7dc4d6fbeddee5..384471441b4842 100644 --- a/tools/lib/perf/tests/test-threadmap.c +++ b/tools/lib/perf/tests/test-threadmap.c @@ -27,5 +27,5 @@ int main(int argc, char **argv) perf_thread_map__put(threads); __T_END; - return 0; + return tests_failed == 0 ? 0 : -1; } diff --git a/tools/lib/subcmd/subcmd-util.h b/tools/lib/subcmd/subcmd-util.h index 794a375dad3601..b2aec04fce8f67 100644 --- a/tools/lib/subcmd/subcmd-util.h +++ b/tools/lib/subcmd/subcmd-util.h @@ -50,15 +50,8 @@ static NORETURN inline void die(const char *err, ...) static inline void *xrealloc(void *ptr, size_t size) { void *ret = realloc(ptr, size); - if (!ret && !size) - ret = realloc(ptr, 1); - if (!ret) { - ret = realloc(ptr, size); - if (!ret && !size) - ret = realloc(ptr, 1); - if (!ret) - die("Out of memory, realloc failed"); - } + if (!ret) + die("Out of memory, realloc failed"); return ret; } diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile index 4ea9a833dde7aa..5cdb19036d7f71 100644 --- a/tools/objtool/Makefile +++ b/tools/objtool/Makefile @@ -3,15 +3,6 @@ include ../scripts/Makefile.include include ../scripts/Makefile.arch # always use the host compiler -ifneq ($(LLVM),) -HOSTAR ?= llvm-ar -HOSTCC ?= clang -HOSTLD ?= ld.lld -else -HOSTAR ?= ar -HOSTCC ?= gcc -HOSTLD ?= ld -endif AR = $(HOSTAR) CC = $(HOSTCC) LD = $(HOSTLD) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index c6ab44543c92ae..956383d5fa62ea 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -2921,14 +2921,10 @@ int check(struct objtool_file *file) warnings += ret; out: - if (ret < 0) { - /* - * Fatal error. The binary is corrupt or otherwise broken in - * some way, or objtool itself is broken. Fail the kernel - * build. - */ - return ret; - } - + /* + * For now, don't fail the kernel build on fatal warnings. These + * errors are still fairly common due to the growing matrix of + * supported toolchains and their recent pace of change. + */ return 0; } diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index 4e1d7460574b4a..f4f3e8d995930a 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -354,8 +354,11 @@ static int read_symbols(struct elf *elf) symtab = find_section_by_name(elf, ".symtab"); if (!symtab) { - WARN("missing symbol table"); - return -1; + /* + * A missing symbol table is actually possible if it's an empty + * .o file. This can happen for thunk_64.o. + */ + return 0; } symtab_shndx = find_section_by_name(elf, ".symtab_shndx"); @@ -422,6 +425,13 @@ static int read_symbols(struct elf *elf) list_add(&sym->list, entry); elf_hash_add(elf->symbol_hash, &sym->hash, sym->idx); elf_hash_add(elf->symbol_name_hash, &sym->name_hash, str_hash(sym->name)); + + /* + * Don't store empty STT_NOTYPE symbols in the rbtree. They + * can exist within a function, confusing the sorting. + */ + if (!sym->len) + rb_erase(&sym->node, &sym->sec->symbol_tree); } if (stats) diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 7ce3f2e8b9c74b..62f3deb1d3a8b7 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -175,10 +175,6 @@ endef LD += $(EXTRA_LDFLAGS) -HOSTCC ?= gcc -HOSTLD ?= ld -HOSTAR ?= ar - PKG_CONFIG = $(CROSS_COMPILE)pkg-config LLVM_CONFIG ?= llvm-config diff --git a/tools/perf/tests/expand-cgroup.c b/tools/perf/tests/expand-cgroup.c index d5771e4d094f84..4c59f3ae438fc6 100644 --- a/tools/perf/tests/expand-cgroup.c +++ b/tools/perf/tests/expand-cgroup.c @@ -145,7 +145,7 @@ static int expand_libpfm_events(void) int ret; struct evlist *evlist; struct rblist metric_events; - const char event_str[] = "UNHALTED_CORE_CYCLES"; + const char event_str[] = "CYCLES"; struct option opt = { .value = &evlist, }; diff --git a/tools/perf/tests/pmu-events.c b/tools/perf/tests/pmu-events.c index d3517a74d95e38..31f987bb7ebba4 100644 --- a/tools/perf/tests/pmu-events.c +++ b/tools/perf/tests/pmu-events.c @@ -561,7 +561,7 @@ static int metric_parse_fake(const char *str) } } - if (expr__parse(&result, &ctx, str, 1)) + if (expr__parse(&result, &ctx, str, 0)) pr_err("expr__parse failed\n"); else ret = 0; diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 15385ea00190fe..74bf480aa4f056 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -2973,7 +2973,7 @@ int machines__for_each_thread(struct machines *machines, pid_t machine__get_current_tid(struct machine *machine, int cpu) { - int nr_cpus = min(machine->env->nr_cpus_online, MAX_NR_CPUS); + int nr_cpus = min(machine->env->nr_cpus_avail, MAX_NR_CPUS); if (cpu < 0 || cpu >= nr_cpus || !machine->current_tid) return -1; @@ -2985,7 +2985,7 @@ int machine__set_current_tid(struct machine *machine, int cpu, pid_t pid, pid_t tid) { struct thread *thread; - int nr_cpus = min(machine->env->nr_cpus_online, MAX_NR_CPUS); + int nr_cpus = min(machine->env->nr_cpus_avail, MAX_NR_CPUS); if (cpu < 0) return -EINVAL; diff --git a/tools/perf/util/parse-regs-options.c b/tools/perf/util/parse-regs-options.c index e687497b3aac0c..a4a100425b3a29 100644 --- a/tools/perf/util/parse-regs-options.c +++ b/tools/perf/util/parse-regs-options.c @@ -54,7 +54,7 @@ __parse_regs(const struct option *opt, const char *str, int unset, bool intr) #endif fputc('\n', stderr); /* just printing available regs */ - return -1; + goto error; } #ifdef HAVE_PERF_REGS_SUPPORT for (r = sample_reg_masks; r->name; r++) { diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c index 064b63a6a3f311..bbecb449ea9443 100644 --- a/tools/perf/util/probe-file.c +++ b/tools/perf/util/probe-file.c @@ -791,7 +791,7 @@ static char *synthesize_sdt_probe_command(struct sdt_note *note, const char *sdtgrp) { struct strbuf buf; - char *ret = NULL, **args; + char *ret = NULL; int i, args_count, err; unsigned long long ref_ctr_offset; @@ -813,12 +813,19 @@ static char *synthesize_sdt_probe_command(struct sdt_note *note, goto out; if (note->args) { - args = argv_split(note->args, &args_count); + char **args = argv_split(note->args, &args_count); + + if (args == NULL) + goto error; for (i = 0; i < args_count; ++i) { - if (synthesize_sdt_probe_arg(&buf, i, args[i]) < 0) + if (synthesize_sdt_probe_arg(&buf, i, args[i]) < 0) { + argv_free(args); goto error; + } } + + argv_free(args); } out: diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 098080287c6876..22098fffac4f11 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -2397,7 +2397,7 @@ int perf_session__cpu_bitmap(struct perf_session *session, { int i, err = -1; struct perf_cpu_map *map; - int nr_cpus = min(session->header.env.nr_cpus_online, MAX_NR_CPUS); + int nr_cpus = min(session->header.env.nr_cpus_avail, MAX_NR_CPUS); for (i = 0; i < PERF_TYPE_MAX; ++i) { struct evsel *evsel; diff --git a/tools/power/acpi/Makefile.config b/tools/power/acpi/Makefile.config index 54a2857c2510ae..331f6d30f47261 100644 --- a/tools/power/acpi/Makefile.config +++ b/tools/power/acpi/Makefile.config @@ -54,7 +54,6 @@ INSTALL_SCRIPT = ${INSTALL_PROGRAM} CROSS = #/usr/i386-linux-uclibc/usr/bin/i386-uclibc- CROSS_COMPILE ?= $(CROSS) LD = $(CC) -HOSTCC = gcc # check if compiler option is supported cc-supports = ${shell if $(CC) ${1} -S -o /dev/null -x c /dev/null > /dev/null 2>&1; then echo "$(1)"; fi;} diff --git a/tools/power/x86/intel-speed-select/isst-config.c b/tools/power/x86/intel-speed-select/isst-config.c index cd089a50585940..ead9e51f75ada6 100644 --- a/tools/power/x86/intel-speed-select/isst-config.c +++ b/tools/power/x86/intel-speed-select/isst-config.c @@ -1245,6 +1245,8 @@ static void dump_isst_config(int arg) isst_ctdp_display_information_end(outf); } +static void adjust_scaling_max_from_base_freq(int cpu); + static void set_tdp_level_for_cpu(int cpu, void *arg1, void *arg2, void *arg3, void *arg4) { @@ -1263,6 +1265,9 @@ static void set_tdp_level_for_cpu(int cpu, void *arg1, void *arg2, void *arg3, int pkg_id = get_physical_package_id(cpu); int die_id = get_physical_die_id(cpu); + /* Wait for updated base frequencies */ + usleep(2000); + fprintf(stderr, "Option is set to online/offline\n"); ctdp_level.core_cpumask_size = alloc_cpu_set(&ctdp_level.core_cpumask); @@ -1279,6 +1284,7 @@ static void set_tdp_level_for_cpu(int cpu, void *arg1, void *arg2, void *arg3, if (CPU_ISSET_S(i, ctdp_level.core_cpumask_size, ctdp_level.core_cpumask)) { fprintf(stderr, "online cpu %d\n", i); set_cpu_online_offline(i, 1); + adjust_scaling_max_from_base_freq(i); } else { fprintf(stderr, "offline cpu %d\n", i); set_cpu_online_offline(i, 0); @@ -1436,6 +1442,31 @@ static int set_cpufreq_scaling_min_max(int cpu, int max, int freq) return 0; } +static int no_turbo(void) +{ + return parse_int_file(0, "/sys/devices/system/cpu/intel_pstate/no_turbo"); +} + +static void adjust_scaling_max_from_base_freq(int cpu) +{ + int base_freq, scaling_max_freq; + + scaling_max_freq = parse_int_file(0, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_max_freq", cpu); + base_freq = get_cpufreq_base_freq(cpu); + if (scaling_max_freq < base_freq || no_turbo()) + set_cpufreq_scaling_min_max(cpu, 1, base_freq); +} + +static void adjust_scaling_min_from_base_freq(int cpu) +{ + int base_freq, scaling_min_freq; + + scaling_min_freq = parse_int_file(0, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_min_freq", cpu); + base_freq = get_cpufreq_base_freq(cpu); + if (scaling_min_freq < base_freq) + set_cpufreq_scaling_min_max(cpu, 0, base_freq); +} + static int set_clx_pbf_cpufreq_scaling_min_max(int cpu) { struct isst_pkg_ctdp_level_info *ctdp_level; @@ -1533,6 +1564,7 @@ static void set_scaling_min_to_cpuinfo_max(int cpu) continue; set_cpufreq_scaling_min_max_from_cpuinfo(i, 1, 0); + adjust_scaling_min_from_base_freq(i); } } diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include index a7974638561cab..1358e89cdf7d69 100644 --- a/tools/scripts/Makefile.include +++ b/tools/scripts/Makefile.include @@ -59,6 +59,16 @@ $(call allow-override,LD,$(CROSS_COMPILE)ld) $(call allow-override,CXX,$(CROSS_COMPILE)g++) $(call allow-override,STRIP,$(CROSS_COMPILE)strip) +ifneq ($(LLVM),) +HOSTAR ?= llvm-ar +HOSTCC ?= clang +HOSTLD ?= ld.lld +else +HOSTAR ?= ar +HOSTCC ?= gcc +HOSTLD ?= ld +endif + ifeq ($(CC_NO_CLANG), 1) EXTRA_WARNINGS += -Wstrict-aliasing=3 endif diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl index 54188ee16c486c..4e245096451738 100755 --- a/tools/testing/ktest/ktest.pl +++ b/tools/testing/ktest/ktest.pl @@ -1499,17 +1499,16 @@ sub dodie { my $log_file; if (defined($opt{"LOG_FILE"})) { - my $whence = 0; # beginning of file - my $pos = $test_log_start; + my $whence = 2; # End of file + my $log_size = tell LOG; + my $size = $log_size - $test_log_start; if (defined($mail_max_size)) { - my $log_size = tell LOG; - $log_size -= $test_log_start; - if ($log_size > $mail_max_size) { - $whence = 2; # end of file - $pos = - $mail_max_size; + if ($size > $mail_max_size) { + $size = $mail_max_size; } } + my $pos = - $size; $log_file = "$tmpdir/log"; open (L, "$opt{LOG_FILE}") or die "Can't open $opt{LOG_FILE} to read)"; open (O, "> $tmpdir/log") or die "Can't open $tmpdir/log\n"; @@ -4253,7 +4252,12 @@ sub do_send_mail { $mail_command =~ s/\$SUBJECT/$subject/g; $mail_command =~ s/\$MESSAGE/$message/g; - run_command $mail_command; + my $ret = run_command $mail_command; + if (!$ret && defined($file)) { + # try again without the file + $message .= "\n\n*** FAILED TO SEND LOG ***\n\n"; + do_send_email($subject, $message); + } } sub send_email { diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 542768f5195b72..9359377aeb35ca 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -146,6 +146,9 @@ VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux) \ /sys/kernel/btf/vmlinux \ /boot/vmlinux-$(shell uname -r) VMLINUX_BTF ?= $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS)))) +ifeq ($(VMLINUX_BTF),) +$(error Cannot find a vmlinux for VMLINUX_BTF at any of "$(VMLINUX_BTF_PATHS)") +endif DEFAULT_BPFTOOL := $(SCRATCH_DIR)/sbin/bpftool @@ -220,7 +223,8 @@ $(RESOLVE_BTFIDS): $(BPFOBJ) | $(BUILD_DIR)/resolve_btfids \ # build would have failed anyways. define get_sys_includes $(shell $(1) -v -E - &1 \ - | sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }') + | sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }') \ +$(shell $(1) -dM -E - > 32; struct dummy_storage *storage; + int err; if (pid != monitored_pid) return 0; storage = bpf_inode_storage_get(&inode_storage_map, victim->d_inode, 0, - BPF_SK_STORAGE_GET_F_CREATE); + BPF_LOCAL_STORAGE_GET_F_CREATE); if (!storage) return 0; - if (storage->value == DUMMY_STORAGE_VALUE) + if (storage->value != DUMMY_STORAGE_VALUE) inode_storage_result = -1; - inode_storage_result = - bpf_inode_storage_delete(&inode_storage_map, victim->d_inode); + err = bpf_inode_storage_delete(&inode_storage_map, victim->d_inode); + if (!err) + inode_storage_result = err; return 0; } @@ -82,19 +84,23 @@ int BPF_PROG(socket_bind, struct socket *sock, struct sockaddr *address, { __u32 pid = bpf_get_current_pid_tgid() >> 32; struct dummy_storage *storage; + int err; if (pid != monitored_pid) return 0; storage = bpf_sk_storage_get(&sk_storage_map, sock->sk, 0, - BPF_SK_STORAGE_GET_F_CREATE); + BPF_LOCAL_STORAGE_GET_F_CREATE); if (!storage) return 0; - if (storage->value == DUMMY_STORAGE_VALUE) + if (storage->value != DUMMY_STORAGE_VALUE) sk_storage_result = -1; - sk_storage_result = bpf_sk_storage_delete(&sk_storage_map, sock->sk); + err = bpf_sk_storage_delete(&sk_storage_map, sock->sk); + if (!err) + sk_storage_result = err; + return 0; } @@ -109,7 +115,7 @@ int BPF_PROG(socket_post_create, struct socket *sock, int family, int type, return 0; storage = bpf_sk_storage_get(&sk_storage_map, sock->sk, 0, - BPF_SK_STORAGE_GET_F_CREATE); + BPF_LOCAL_STORAGE_GET_F_CREATE); if (!storage) return 0; @@ -131,7 +137,7 @@ int BPF_PROG(file_open, struct file *file) return 0; storage = bpf_inode_storage_get(&inode_storage_map, file->f_inode, 0, - BPF_LOCAL_STORAGE_GET_F_CREATE); + BPF_LOCAL_STORAGE_GET_F_CREATE); if (!storage) return 0; diff --git a/tools/testing/selftests/bpf/progs/profiler.inc.h b/tools/testing/selftests/bpf/progs/profiler.inc.h index 30982a7e4d0f78..4896fdf816f73a 100644 --- a/tools/testing/selftests/bpf/progs/profiler.inc.h +++ b/tools/testing/selftests/bpf/progs/profiler.inc.h @@ -256,6 +256,7 @@ static INLINE void* populate_cgroup_info(struct cgroup_data_t* cgroup_data, BPF_CORE_READ(task, nsproxy, cgroup_ns, root_cset, dfl_cgrp, kn); struct kernfs_node* proc_kernfs = BPF_CORE_READ(task, cgroups, dfl_cgrp, kn); +#if __has_builtin(__builtin_preserve_enum_value) if (ENABLE_CGROUP_V1_RESOLVER && CONFIG_CGROUP_PIDS) { int cgrp_id = bpf_core_enum_value(enum cgroup_subsys_id___local, pids_cgrp_id___local); @@ -275,6 +276,7 @@ static INLINE void* populate_cgroup_info(struct cgroup_data_t* cgroup_data, } } } +#endif cgroup_data->cgroup_root_inode = get_inode_from_kernfs(root_kernfs); cgroup_data->cgroup_proc_inode = get_inode_from_kernfs(proc_kernfs); diff --git a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c index f48dbfe24ddc8b..a621b58ab079d5 100644 --- a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c +++ b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include @@ -528,12 +527,11 @@ int _ipip_set_tunnel(struct __sk_buff *skb) struct bpf_tunnel_key key = {}; void *data = (void *)(long)skb->data; struct iphdr *iph = data; - struct tcphdr *tcp = data + sizeof(*iph); void *data_end = (void *)(long)skb->data_end; int ret; /* single length check */ - if (data + sizeof(*iph) + sizeof(*tcp) > data_end) { + if (data + sizeof(*iph) > data_end) { ERROR(1); return TC_ACT_SHOT; } @@ -541,16 +539,6 @@ int _ipip_set_tunnel(struct __sk_buff *skb) key.tunnel_ttl = 64; if (iph->protocol == IPPROTO_ICMP) { key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */ - } else { - if (iph->protocol != IPPROTO_TCP || iph->ihl != 5) - return TC_ACT_SHOT; - - if (tcp->dest == bpf_htons(5200)) - key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */ - else if (tcp->dest == bpf_htons(5201)) - key.remote_ipv4 = 0xac100165; /* 172.16.1.101 */ - else - return TC_ACT_SHOT; } ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), 0); @@ -585,19 +573,20 @@ int _ipip6_set_tunnel(struct __sk_buff *skb) struct bpf_tunnel_key key = {}; void *data = (void *)(long)skb->data; struct iphdr *iph = data; - struct tcphdr *tcp = data + sizeof(*iph); void *data_end = (void *)(long)skb->data_end; int ret; /* single length check */ - if (data + sizeof(*iph) + sizeof(*tcp) > data_end) { + if (data + sizeof(*iph) > data_end) { ERROR(1); return TC_ACT_SHOT; } __builtin_memset(&key, 0x0, sizeof(key)); - key.remote_ipv6[3] = bpf_htonl(0x11); /* ::11 */ key.tunnel_ttl = 64; + if (iph->protocol == IPPROTO_ICMP) { + key.remote_ipv6[3] = bpf_htonl(0x11); /* ::11 */ + } ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), BPF_F_TUNINFO_IPV6); @@ -634,35 +623,18 @@ int _ip6ip6_set_tunnel(struct __sk_buff *skb) struct bpf_tunnel_key key = {}; void *data = (void *)(long)skb->data; struct ipv6hdr *iph = data; - struct tcphdr *tcp = data + sizeof(*iph); void *data_end = (void *)(long)skb->data_end; int ret; /* single length check */ - if (data + sizeof(*iph) + sizeof(*tcp) > data_end) { + if (data + sizeof(*iph) > data_end) { ERROR(1); return TC_ACT_SHOT; } - key.remote_ipv6[0] = bpf_htonl(0x2401db00); key.tunnel_ttl = 64; - if (iph->nexthdr == 58 /* NEXTHDR_ICMP */) { - key.remote_ipv6[3] = bpf_htonl(1); - } else { - if (iph->nexthdr != 6 /* NEXTHDR_TCP */) { - ERROR(iph->nexthdr); - return TC_ACT_SHOT; - } - - if (tcp->dest == bpf_htons(5200)) { - key.remote_ipv6[3] = bpf_htonl(1); - } else if (tcp->dest == bpf_htons(5201)) { - key.remote_ipv6[3] = bpf_htonl(2); - } else { - ERROR(tcp->dest); - return TC_ACT_SHOT; - } + key.remote_ipv6[3] = bpf_htonl(0x11); /* ::11 */ } ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c index 0fa1e421c3d7a1..427ca00a32177a 100644 --- a/tools/testing/selftests/bpf/test_sockmap.c +++ b/tools/testing/selftests/bpf/test_sockmap.c @@ -1273,6 +1273,16 @@ static char *test_to_str(int test) return "unknown"; } +static void append_str(char *dst, const char *src, size_t dst_cap) +{ + size_t avail = dst_cap - strlen(dst); + + if (avail <= 1) /* just zero byte could be written */ + return; + + strncat(dst, src, avail - 1); /* strncat() adds + 1 for zero byte */ +} + #define OPTSTRING 60 static void test_options(char *options) { @@ -1281,42 +1291,42 @@ static void test_options(char *options) memset(options, 0, OPTSTRING); if (txmsg_pass) - strncat(options, "pass,", OPTSTRING); + append_str(options, "pass,", OPTSTRING); if (txmsg_redir) - strncat(options, "redir,", OPTSTRING); + append_str(options, "redir,", OPTSTRING); if (txmsg_drop) - strncat(options, "drop,", OPTSTRING); + append_str(options, "drop,", OPTSTRING); if (txmsg_apply) { snprintf(tstr, OPTSTRING, "apply %d,", txmsg_apply); - strncat(options, tstr, OPTSTRING); + append_str(options, tstr, OPTSTRING); } if (txmsg_cork) { snprintf(tstr, OPTSTRING, "cork %d,", txmsg_cork); - strncat(options, tstr, OPTSTRING); + append_str(options, tstr, OPTSTRING); } if (txmsg_start) { snprintf(tstr, OPTSTRING, "start %d,", txmsg_start); - strncat(options, tstr, OPTSTRING); + append_str(options, tstr, OPTSTRING); } if (txmsg_end) { snprintf(tstr, OPTSTRING, "end %d,", txmsg_end); - strncat(options, tstr, OPTSTRING); + append_str(options, tstr, OPTSTRING); } if (txmsg_start_pop) { snprintf(tstr, OPTSTRING, "pop (%d,%d),", txmsg_start_pop, txmsg_start_pop + txmsg_pop); - strncat(options, tstr, OPTSTRING); + append_str(options, tstr, OPTSTRING); } if (txmsg_ingress) - strncat(options, "ingress,", OPTSTRING); + append_str(options, "ingress,", OPTSTRING); if (txmsg_redir_skb) - strncat(options, "redir_skb,", OPTSTRING); + append_str(options, "redir_skb,", OPTSTRING); if (txmsg_ktls_skb) - strncat(options, "ktls_skb,", OPTSTRING); + append_str(options, "ktls_skb,", OPTSTRING); if (ktls) - strncat(options, "ktls,", OPTSTRING); + append_str(options, "ktls,", OPTSTRING); if (peek_flag) - strncat(options, "peek,", OPTSTRING); + append_str(options, "peek,", OPTSTRING); } static int __test_exec(int cgrp, int test, struct sockmap_options *opt) diff --git a/tools/testing/selftests/bpf/test_tunnel.sh b/tools/testing/selftests/bpf/test_tunnel.sh index bd12ec97a44df3..1ccbe804e8e1cb 100755 --- a/tools/testing/selftests/bpf/test_tunnel.sh +++ b/tools/testing/selftests/bpf/test_tunnel.sh @@ -24,12 +24,12 @@ # Root namespace with metadata-mode tunnel + BPF # Device names and addresses: # veth1 IP: 172.16.1.200, IPv6: 00::22 (underlay) -# tunnel dev 11, ex: gre11, IPv4: 10.1.1.200 (overlay) +# tunnel dev 11, ex: gre11, IPv4: 10.1.1.200, IPv6: 1::22 (overlay) # # Namespace at_ns0 with native tunnel # Device names and addresses: # veth0 IPv4: 172.16.1.100, IPv6: 00::11 (underlay) -# tunnel dev 00, ex: gre00, IPv4: 10.1.1.100 (overlay) +# tunnel dev 00, ex: gre00, IPv4: 10.1.1.100, IPv6: 1::11 (overlay) # # # End-to-end ping packet flow @@ -250,7 +250,7 @@ add_ipip_tunnel() ip addr add dev $DEV 10.1.1.200/24 } -add_ipip6tnl_tunnel() +add_ip6tnl_tunnel() { ip netns exec at_ns0 ip addr add ::11/96 dev veth0 ip netns exec at_ns0 ip link set dev veth0 up @@ -262,11 +262,13 @@ add_ipip6tnl_tunnel() ip link add dev $DEV_NS type $TYPE \ local ::11 remote ::22 ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24 + ip netns exec at_ns0 ip addr add dev $DEV_NS 1::11/96 ip netns exec at_ns0 ip link set dev $DEV_NS up # root namespace ip link add dev $DEV type $TYPE external ip addr add dev $DEV 10.1.1.200/24 + ip addr add dev $DEV 1::22/96 ip link set dev $DEV up } @@ -534,7 +536,7 @@ test_ipip6() check $TYPE config_device - add_ipip6tnl_tunnel + add_ip6tnl_tunnel ip link set dev veth1 mtu 1500 attach_bpf $DEV ipip6_set_tunnel ipip6_get_tunnel # underlay @@ -553,6 +555,34 @@ test_ipip6() echo -e ${GREEN}"PASS: $TYPE"${NC} } +test_ip6ip6() +{ + TYPE=ip6tnl + DEV_NS=ip6ip6tnl00 + DEV=ip6ip6tnl11 + ret=0 + + check $TYPE + config_device + add_ip6tnl_tunnel + ip link set dev veth1 mtu 1500 + attach_bpf $DEV ip6ip6_set_tunnel ip6ip6_get_tunnel + # underlay + ping6 $PING_ARG ::11 + # ip6 over ip6 + ping6 $PING_ARG 1::11 + check_err $? + ip netns exec at_ns0 ping6 $PING_ARG 1::22 + check_err $? + cleanup + + if [ $ret -ne 0 ]; then + echo -e ${RED}"FAIL: ip6$TYPE"${NC} + return 1 + fi + echo -e ${GREEN}"PASS: ip6$TYPE"${NC} +} + setup_xfrm_tunnel() { auth=0x$(printf '1%.0s' {1..40}) @@ -646,6 +676,7 @@ cleanup() ip link del veth1 2> /dev/null ip link del ipip11 2> /dev/null ip link del ipip6tnl11 2> /dev/null + ip link del ip6ip6tnl11 2> /dev/null ip link del gretap11 2> /dev/null ip link del ip6gre11 2> /dev/null ip link del ip6gretap11 2> /dev/null @@ -742,6 +773,10 @@ bpf_tunnel_test() test_ipip6 errors=$(( $errors + $? )) + echo "Testing IP6IP6 tunnel..." + test_ip6ip6 + errors=$(( $errors + $? )) + echo "Testing IPSec tunnel..." test_xfrm_tunnel errors=$(( $errors + $? )) diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh index 4d900bc1f76c6b..5c7700212f7537 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh @@ -230,7 +230,7 @@ switch_create() __mlnx_qos -i $swp4 --pfc=0,1,0,0,0,0,0,0 >/dev/null # PG0 will get autoconfigured to Xoff, give PG1 arbitrarily 100K, which # is (-2*MTU) about 80K of delay provision. - __mlnx_qos -i $swp3 --buffer_size=0,$_100KB,0,0,0,0,0,0 >/dev/null + __mlnx_qos -i $swp4 --buffer_size=0,$_100KB,0,0,0,0,0,0 >/dev/null # bridges # ------- diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh index eb693a3b7b4a19..4c7d33618437c6 100755 --- a/tools/testing/selftests/net/fib_nexthops.sh +++ b/tools/testing/selftests/net/fib_nexthops.sh @@ -869,7 +869,7 @@ ipv6_torture() pid3=$! ip netns exec me ping -f 2001:db8:101::2 >/dev/null 2>&1 & pid4=$! - ip netns exec me mausezahn veth1 -B 2001:db8:101::2 -A 2001:db8:91::1 -c 0 -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1 & + ip netns exec me mausezahn -6 veth1 -B 2001:db8:101::2 -A 2001:db8:91::1 -c 0 -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1 & pid5=$! sleep 300 diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh index 84205c3a55ebed..2b5707738609ef 100755 --- a/tools/testing/selftests/net/fib_tests.sh +++ b/tools/testing/selftests/net/fib_tests.sh @@ -1055,7 +1055,6 @@ ipv6_addr_metric_test() check_route6 "2001:db8:104::1 dev dummy2 proto kernel metric 260" log_test $? 0 "Set metric with peer route on local side" - log_test $? 0 "User specified metric on local address" check_route6 "2001:db8:104::2 dev dummy2 proto kernel metric 260" log_test $? 0 "Set metric with peer route on peer side" diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh index cf3d26c233e8ec..7fcc42bc076fad 100755 --- a/tools/testing/selftests/net/forwarding/router_mpath_nh.sh +++ b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh @@ -197,7 +197,7 @@ multipath4_test() t0_rp12=$(link_stats_tx_packets_get $rp12) t0_rp13=$(link_stats_tx_packets_get $rp13) - ip vrf exec vrf-h1 $MZ -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \ + ip vrf exec vrf-h1 $MZ $h1 -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \ -d 1msec -t udp "sp=1024,dp=0-32768" t1_rp12=$(link_stats_tx_packets_get $rp12) diff --git a/tools/testing/selftests/net/forwarding/router_multipath.sh b/tools/testing/selftests/net/forwarding/router_multipath.sh index 79a2099279621a..464821c587a5e8 100755 --- a/tools/testing/selftests/net/forwarding/router_multipath.sh +++ b/tools/testing/selftests/net/forwarding/router_multipath.sh @@ -178,7 +178,7 @@ multipath4_test() t0_rp12=$(link_stats_tx_packets_get $rp12) t0_rp13=$(link_stats_tx_packets_get $rp13) - ip vrf exec vrf-h1 $MZ -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \ + ip vrf exec vrf-h1 $MZ $h1 -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \ -d 1msec -t udp "sp=1024,dp=0-32768" t1_rp12=$(link_stats_tx_packets_get $rp12) diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh index 6bbf69a28e128e..3367fb5f2feff5 100755 --- a/tools/testing/selftests/net/pmtu.sh +++ b/tools/testing/selftests/net/pmtu.sh @@ -162,7 +162,15 @@ # - list_flush_ipv6_exception # Using the same topology as in pmtu_ipv6, create exceptions, and check # they are shown when listing exception caches, gone after flushing them - +# +# - pmtu_ipv4_route_change +# Use the same topology as in pmtu_ipv4, but issue a route replacement +# command and delete the corresponding device afterward. This tests for +# proper cleanup of the PMTU exceptions by the route replacement path. +# Device unregistration should complete successfully +# +# - pmtu_ipv6_route_change +# Same as above but with IPv6 # Kselftest framework requirement - SKIP code is 4. ksft_skip=4 @@ -224,7 +232,9 @@ tests=" cleanup_ipv4_exception ipv4: cleanup of cached exceptions 1 cleanup_ipv6_exception ipv6: cleanup of cached exceptions 1 list_flush_ipv4_exception ipv4: list and flush cached exceptions 1 - list_flush_ipv6_exception ipv6: list and flush cached exceptions 1" + list_flush_ipv6_exception ipv6: list and flush cached exceptions 1 + pmtu_ipv4_route_change ipv4: PMTU exception w/route replace 1 + pmtu_ipv6_route_change ipv6: PMTU exception w/route replace 1" NS_A="ns-A" NS_B="ns-B" @@ -1770,6 +1780,63 @@ test_list_flush_ipv6_exception() { return ${fail} } +test_pmtu_ipvX_route_change() { + family=${1} + + setup namespaces routing || return 2 + trace "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \ + "${ns_r1}" veth_R1-B "${ns_b}" veth_B-R1 \ + "${ns_a}" veth_A-R2 "${ns_r2}" veth_R2-A \ + "${ns_r2}" veth_R2-B "${ns_b}" veth_B-R2 + + if [ ${family} -eq 4 ]; then + ping=ping + dst1="${prefix4}.${b_r1}.1" + dst2="${prefix4}.${b_r2}.1" + gw="${prefix4}.${a_r1}.2" + else + ping=${ping6} + dst1="${prefix6}:${b_r1}::1" + dst2="${prefix6}:${b_r2}::1" + gw="${prefix6}:${a_r1}::2" + fi + + # Set up initial MTU values + mtu "${ns_a}" veth_A-R1 2000 + mtu "${ns_r1}" veth_R1-A 2000 + mtu "${ns_r1}" veth_R1-B 1400 + mtu "${ns_b}" veth_B-R1 1400 + + mtu "${ns_a}" veth_A-R2 2000 + mtu "${ns_r2}" veth_R2-A 2000 + mtu "${ns_r2}" veth_R2-B 1500 + mtu "${ns_b}" veth_B-R2 1500 + + # Create route exceptions + run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1800 ${dst1} + run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1800 ${dst2} + + # Check that exceptions have been created with the correct PMTU + pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst1})" + check_pmtu_value "1400" "${pmtu_1}" "exceeding MTU" || return 1 + pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})" + check_pmtu_value "1500" "${pmtu_2}" "exceeding MTU" || return 1 + + # Replace the route from A to R1 + run_cmd ${ns_a} ip route change default via ${gw} + + # Delete the device in A + run_cmd ${ns_a} ip link del "veth_A-R1" +} + +test_pmtu_ipv4_route_change() { + test_pmtu_ipvX_route_change 4 +} + +test_pmtu_ipv6_route_change() { + test_pmtu_ipvX_route_change 6 +} + usage() { echo echo "$0 [OPTIONS] [TEST]..." diff --git a/tools/testing/selftests/net/udpgro.sh b/tools/testing/selftests/net/udpgro.sh index ac2a30be9b325a..f8a19f548ae9d5 100755 --- a/tools/testing/selftests/net/udpgro.sh +++ b/tools/testing/selftests/net/udpgro.sh @@ -5,6 +5,14 @@ readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)" +# set global exit status, but never reset nonzero one. +check_err() +{ + if [ $ret -eq 0 ]; then + ret=$1 + fi +} + cleanup() { local -r jobs="$(jobs -p)" local -r ns="$(ip netns list|grep $PEER_NS)" @@ -44,7 +52,9 @@ run_one() { # Hack: let bg programs complete the startup sleep 0.1 ./udpgso_bench_tx ${tx_args} + ret=$? wait $(jobs -p) + return $ret } run_test() { @@ -87,8 +97,10 @@ run_one_nat() { sleep 0.1 ./udpgso_bench_tx ${tx_args} + ret=$? kill -INT $pid wait $(jobs -p) + return $ret } run_one_2sock() { @@ -110,7 +122,9 @@ run_one_2sock() { sleep 0.1 # first UDP GSO socket should be closed at this point ./udpgso_bench_tx ${tx_args} + ret=$? wait $(jobs -p) + return $ret } run_nat_test() { @@ -131,36 +145,54 @@ run_all() { local -r core_args="-l 4" local -r ipv4_args="${core_args} -4 -D 192.168.1.1" local -r ipv6_args="${core_args} -6 -D 2001:db8::1" + ret=0 echo "ipv4" run_test "no GRO" "${ipv4_args} -M 10 -s 1400" "-4 -n 10 -l 1400" + check_err $? # explicitly check we are not receiving UDP_SEGMENT cmsg (-S -1) # when GRO does not take place run_test "no GRO chk cmsg" "${ipv4_args} -M 10 -s 1400" "-4 -n 10 -l 1400 -S -1" + check_err $? # the GSO packets are aggregated because: # * veth schedule napi after each xmit # * segmentation happens in BH context, veth napi poll is delayed after # the transmission of the last segment run_test "GRO" "${ipv4_args} -M 1 -s 14720 -S 0 " "-4 -n 1 -l 14720" + check_err $? run_test "GRO chk cmsg" "${ipv4_args} -M 1 -s 14720 -S 0 " "-4 -n 1 -l 14720 -S 1472" + check_err $? run_test "GRO with custom segment size" "${ipv4_args} -M 1 -s 14720 -S 500 " "-4 -n 1 -l 14720" + check_err $? run_test "GRO with custom segment size cmsg" "${ipv4_args} -M 1 -s 14720 -S 500 " "-4 -n 1 -l 14720 -S 500" + check_err $? run_nat_test "bad GRO lookup" "${ipv4_args} -M 1 -s 14720 -S 0" "-n 10 -l 1472" + check_err $? run_2sock_test "multiple GRO socks" "${ipv4_args} -M 1 -s 14720 -S 0 " "-4 -n 1 -l 14720 -S 1472" + check_err $? echo "ipv6" run_test "no GRO" "${ipv6_args} -M 10 -s 1400" "-n 10 -l 1400" + check_err $? run_test "no GRO chk cmsg" "${ipv6_args} -M 10 -s 1400" "-n 10 -l 1400 -S -1" + check_err $? run_test "GRO" "${ipv6_args} -M 1 -s 14520 -S 0" "-n 1 -l 14520" + check_err $? run_test "GRO chk cmsg" "${ipv6_args} -M 1 -s 14520 -S 0" "-n 1 -l 14520 -S 1452" + check_err $? run_test "GRO with custom segment size" "${ipv6_args} -M 1 -s 14520 -S 500" "-n 1 -l 14520" + check_err $? run_test "GRO with custom segment size cmsg" "${ipv6_args} -M 1 -s 14520 -S 500" "-n 1 -l 14520 -S 500" + check_err $? run_nat_test "bad GRO lookup" "${ipv6_args} -M 1 -s 14520 -S 0" "-n 10 -l 1452" + check_err $? run_2sock_test "multiple GRO socks" "${ipv6_args} -M 1 -s 14520 -S 0 " "-n 1 -l 14520 -S 1452" + check_err $? + return $ret } if [ ! -f ../bpf/xdp_dummy.o ]; then @@ -180,3 +212,5 @@ elif [[ $1 == "__subprocess_2sock" ]]; then shift run_one_2sock $@ fi + +exit $? diff --git a/tools/testing/selftests/net/xfrm_policy.sh b/tools/testing/selftests/net/xfrm_policy.sh index 7a1bf94c5bd38b..bdf450eaf60cff 100755 --- a/tools/testing/selftests/net/xfrm_policy.sh +++ b/tools/testing/selftests/net/xfrm_policy.sh @@ -202,7 +202,7 @@ check_xfrm() { # 1: iptables -m policy rule count != 0 rval=$1 ip=$2 - lret=0 + local lret=0 ip netns exec ns1 ping -q -c 1 10.0.2.$ip > /dev/null @@ -287,6 +287,47 @@ check_hthresh_repeat() return 0 } +# insert non-overlapping policies in a random order and check that +# all of them can be fetched using the traffic selectors. +check_random_order() +{ + local ns=$1 + local log=$2 + + for i in $(seq 100); do + ip -net $ns xfrm policy flush + for j in $(seq 0 16 255 | sort -R); do + ip -net $ns xfrm policy add dst $j.0.0.0/24 dir out priority 10 action allow + done + for j in $(seq 0 16 255); do + if ! ip -net $ns xfrm policy get dst $j.0.0.0/24 dir out > /dev/null; then + echo "FAIL: $log" 1>&2 + return 1 + fi + done + done + + for i in $(seq 100); do + ip -net $ns xfrm policy flush + for j in $(seq 0 16 255 | sort -R); do + local addr=$(printf "e000:0000:%02x00::/56" $j) + ip -net $ns xfrm policy add dst $addr dir out priority 10 action allow + done + for j in $(seq 0 16 255); do + local addr=$(printf "e000:0000:%02x00::/56" $j) + if ! ip -net $ns xfrm policy get dst $addr dir out > /dev/null; then + echo "FAIL: $log" 1>&2 + return 1 + fi + done + done + + ip -net $ns xfrm policy flush + + echo "PASS: $log" + return 0 +} + #check for needed privileges if [ "$(id -u)" -ne 0 ];then echo "SKIP: Need root privileges" @@ -438,6 +479,8 @@ check_exceptions "exceptions and block policies after htresh change to normal" check_hthresh_repeat "policies with repeated htresh change" +check_random_order ns3 "policies inserted in random order" + for i in 1 2 3 4;do ip netns del ns$i;done exit $ret diff --git a/tools/testing/selftests/netfilter/nft_conntrack_helper.sh b/tools/testing/selftests/netfilter/nft_conntrack_helper.sh index edf0a48da6bf80..bf6b9626c7dd27 100755 --- a/tools/testing/selftests/netfilter/nft_conntrack_helper.sh +++ b/tools/testing/selftests/netfilter/nft_conntrack_helper.sh @@ -94,7 +94,13 @@ check_for_helper() local message=$2 local port=$3 - ip netns exec ${netns} conntrack -L -p tcp --dport $port 2> /dev/null |grep -q 'helper=ftp' + if echo $message |grep -q 'ipv6';then + local family="ipv6" + else + local family="ipv4" + fi + + ip netns exec ${netns} conntrack -L -f $family -p tcp --dport $port 2> /dev/null |grep -q 'helper=ftp' if [ $? -ne 0 ] ; then echo "FAIL: ${netns} did not show attached helper $message" 1>&2 ret=1 @@ -111,8 +117,8 @@ test_helper() sleep 3 | ip netns exec ${ns2} nc -w 2 -l -p $port > /dev/null & - sleep 1 sleep 1 | ip netns exec ${ns1} nc -w 2 10.0.1.2 $port > /dev/null & + sleep 1 check_for_helper "$ns1" "ip $msg" $port check_for_helper "$ns2" "ip $msg" $port @@ -128,8 +134,8 @@ test_helper() sleep 3 | ip netns exec ${ns2} nc -w 2 -6 -l -p $port > /dev/null & - sleep 1 sleep 1 | ip netns exec ${ns1} nc -w 2 -6 dead:1::2 $port > /dev/null & + sleep 1 check_for_helper "$ns1" "ipv6 $msg" $port check_for_helper "$ns2" "ipv6 $msg" $port diff --git a/tools/testing/selftests/powerpc/alignment/alignment_handler.c b/tools/testing/selftests/powerpc/alignment/alignment_handler.c index cb53a8b777e68a..c25cf7cd45e9fd 100644 --- a/tools/testing/selftests/powerpc/alignment/alignment_handler.c +++ b/tools/testing/selftests/powerpc/alignment/alignment_handler.c @@ -443,7 +443,6 @@ int test_alignment_handler_integer(void) LOAD_DFORM_TEST(ldu); LOAD_XFORM_TEST(ldx); LOAD_XFORM_TEST(ldux); - LOAD_DFORM_TEST(lmw); STORE_DFORM_TEST(stb); STORE_XFORM_TEST(stbx); STORE_DFORM_TEST(stbu); @@ -462,7 +461,11 @@ int test_alignment_handler_integer(void) STORE_XFORM_TEST(stdx); STORE_DFORM_TEST(stdu); STORE_XFORM_TEST(stdux); + +#ifdef __BIG_ENDIAN__ + LOAD_DFORM_TEST(lmw); STORE_DFORM_TEST(stmw); +#endif return rc; } diff --git a/tools/testing/selftests/powerpc/mm/pkey_exec_prot.c b/tools/testing/selftests/powerpc/mm/pkey_exec_prot.c index 9e5c7f3f498a79..0af4f02669a115 100644 --- a/tools/testing/selftests/powerpc/mm/pkey_exec_prot.c +++ b/tools/testing/selftests/powerpc/mm/pkey_exec_prot.c @@ -290,5 +290,5 @@ static int test(void) int main(void) { - test_harness(test, "pkey_exec_prot"); + return test_harness(test, "pkey_exec_prot"); } diff --git a/tools/testing/selftests/powerpc/mm/pkey_siginfo.c b/tools/testing/selftests/powerpc/mm/pkey_siginfo.c index 4f815d7c12145a..2db76e56d4cb99 100644 --- a/tools/testing/selftests/powerpc/mm/pkey_siginfo.c +++ b/tools/testing/selftests/powerpc/mm/pkey_siginfo.c @@ -329,5 +329,5 @@ static int test(void) int main(void) { - test_harness(test, "pkey_siginfo"); + return test_harness(test, "pkey_siginfo"); } diff --git a/tools/testing/selftests/run_kselftest.sh b/tools/testing/selftests/run_kselftest.sh index 609a4ef9300e32..97165a83df6320 100755 --- a/tools/testing/selftests/run_kselftest.sh +++ b/tools/testing/selftests/run_kselftest.sh @@ -48,7 +48,7 @@ while true; do -l | --list) echo "$available" exit 0 ;; - -n | --dry-run) + -d | --dry-run) dryrun="echo" shift ;; -h | --help) diff --git a/tools/testing/selftests/seccomp/config b/tools/testing/selftests/seccomp/config index 64c19d8eba795c..ad431a5178fbe5 100644 --- a/tools/testing/selftests/seccomp/config +++ b/tools/testing/selftests/seccomp/config @@ -1,3 +1,4 @@ +CONFIG_PID_NS=y CONFIG_SECCOMP=y CONFIG_SECCOMP_FILTER=y CONFIG_USER_NS=y diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index 691893afc15d87..e63f3163270801 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 # Makefile for vm selftests uname_M := $(shell uname -m 2>/dev/null || echo not) -MACHINE ?= $(shell echo $(uname_M) | sed -e 's/aarch64.*/arm64/') +MACHINE ?= $(shell echo $(uname_M) | sed -e 's/aarch64.*/arm64/' -e 's/ppc64.*/ppc64/') # Without this, failed build products remain, with up-to-date timestamps, # thus tricking Make (and you!) into believing that All Is Well, in subsequent @@ -39,7 +39,7 @@ TEST_GEN_FILES += transhuge-stress TEST_GEN_FILES += userfaultfd TEST_GEN_FILES += khugepaged -ifeq ($(ARCH),x86_64) +ifeq ($(MACHINE),x86_64) CAN_BUILD_I386 := $(shell ./../x86/check_cc.sh $(CC) ../x86/trivial_32bit_program.c -m32) CAN_BUILD_X86_64 := $(shell ./../x86/check_cc.sh $(CC) ../x86/trivial_64bit_program.c) CAN_BUILD_WITH_NOPIE := $(shell ./../x86/check_cc.sh $(CC) ../x86/trivial_program.c -no-pie) @@ -61,13 +61,13 @@ TEST_GEN_FILES += $(BINARIES_64) endif else -ifneq (,$(findstring $(ARCH),powerpc)) +ifneq (,$(findstring $(MACHINE),ppc64)) TEST_GEN_FILES += protection_keys endif endif -ifneq (,$(filter $(MACHINE),arm64 ia64 mips64 parisc64 ppc64 ppc64le riscv64 s390x sh64 sparc64 x86_64)) +ifneq (,$(filter $(MACHINE),arm64 ia64 mips64 parisc64 ppc64 riscv64 s390x sh64 sparc64 x86_64)) TEST_GEN_FILES += va_128TBswitch TEST_GEN_FILES += virtual_address_range TEST_GEN_FILES += write_to_hugetlbfs @@ -82,7 +82,7 @@ include ../lib.mk $(OUTPUT)/hmm-tests: LDLIBS += -lhugetlbfs -lpthread -ifeq ($(ARCH),x86_64) +ifeq ($(MACHINE),x86_64) BINARIES_32 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_32)) BINARIES_64 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_64)) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 2541a17ff1c453..cf9cc0ed7e9955 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -482,9 +482,8 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, kvm->mmu_notifier_count++; need_tlb_flush = kvm_unmap_hva_range(kvm, range->start, range->end, range->flags); - need_tlb_flush |= kvm->tlbs_dirty; /* we've to flush the tlb before the pages can be freed */ - if (need_tlb_flush) + if (need_tlb_flush || kvm->tlbs_dirty) kvm_flush_remote_tlbs(kvm); spin_unlock(&kvm->mmu_lock); @@ -1290,6 +1289,7 @@ int __kvm_set_memory_region(struct kvm *kvm, return -EINVAL; /* We can read the guest memory with __xxx_user() later on. */ if ((mem->userspace_addr & (PAGE_SIZE - 1)) || + (mem->userspace_addr != untagged_addr(mem->userspace_addr)) || !access_ok((void __user *)(unsigned long)mem->userspace_addr, mem->memory_size)) return -EINVAL;