From defa358fefceda186329f816e9729b757610c40a Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Mon, 9 Mar 2026 15:18:04 +0100
Subject: [PATCH] Revert "Merge pull request #52 from brauner/work"

This reverts commit 3564bbf87c0f5a6935e85f4fbd7c773a4fb7cc8d, reversing
changes made to f6ea44fa553b157fb2722f93d3342d8b5f59b3e9.

This turned out to be too ugly to live. So undo it.

Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 .github/ISSUE_TEMPLATE/feature-idea.yml       |   53 -
 .github/PULL_REQUEST_TEMPLATE.md              |   17 -
 .github/workflows/gh-pages.yml                |   16 +-
 CONTRIBUTING.md                               |   77 --
 README.md                                     | 1209 ++++++++++++++++-
 website/README.md                             |   48 +-
 website/archetypes/default.md                 |    6 +-
 website/assets/_custom.scss                   |   74 +-
 website/assets/_variables.scss                |    5 +-
 website/config.toml                           |   20 +-
 website/content/_index.md                     |   42 -
 website/content/completed/_index.md           |    7 -
 ...liable-way-to-check-for-pid-namespacing.md |   28 -
 ...call-to-query-information-about-a-mount.md |   37 -
 ...itional-identifiers-for-pidfds-in-statx.md |   18 -
 ...ng-idmapped-mounts-from-idmapped-mounts.md |   43 -
 ...ermine-the-parent-process-id-of-a-pidfd.md |   28 -
 ...ption-of-scm-rights-for-af-unix-sockets.md |   53 -
 .../completed/idmapped-mounts-for-tmpfs.md    |   31 -
 ...statx-on-a-pidfd-return-additional-info.md |   28 -
 ...tory-instead-of-the-top-level-directory.md |   34 -
 ...thout-rescanning-of-proc-self-mountinfo.md |   26 -
 .../namespace-binfmt-misc-filesystem.md       |   17 -
 ...-translate-a-pid-between-pid-namespaces.md |   17 -
 ...pace-to-have-an-idmapping-when-attached.md |   18 -
 .../scm-pidfd-auxiliary-socket-message.md     |   21 -
 .../security-hook-for-create-user-ns.md       |   15 -
 .../completed/set-comm-field-before-exec.md   |   35 -
 .../support-idmapped-mounts-for-overlayfs.md  |   15 -
 ...t-if-into-account-for-routing-decisions.md |   25 -
 .../unmounting-of-obstructed-mounts.md        |   24 -
 website/content/in-progress/_index.md         |    9 -
 ...et-entrypoint-inodes-in-the-file-system.md |   41 -
 .../add-immutable-rootfs-nullfs.md            |   37 -
 .../allow-move-mount-beneath-on-the-rootfs.md |   48 -
 ...mpty-mntns-and-clone3-clone-empty-mntns.md |   18 -
 ...custom-rootfs-via-open-tree-and-fsmount.md |   39 -
 ...tion-via-file-descriptor-with-statmount.md |   22 -
 ...support-detached-mounts-with-pivot-root.md |   20 -
 website/content/wishlist/_index.md            |    9 -
 .../a-timeout-for-the-flock-syscall.md        |   20 -
 .../ability-to-only-open-regular-files.md     |   20 -
 ...ability-to-reopen-a-struct-block-device.md |   18 -
 ...nt-setattr-locked-flag-to-mount-setattr.md |   22 -
 .../add-process-by-pidfd-to-a-cgroup.md       |   26 -
 .../content/wishlist/asynchronous-close.md    |   29 -
 ...pty-path-support-for-openat-and-openat2.md |   18 -
 .../at-empty-path-support-for-unlinkat.md     |   11 -
 .../automatic-growing-of-btrfs-filesystems.md |   30 -
 ...-message-describing-the-sender-s-cgroup.md |   21 -
 website/content/wishlist/blobfs.md            |   22 -
 .../clock-monotonic-network-timestamps.md     |   21 -
 ...lone-pidfd-autokill-semantics-for-pid-1.md |   37 -
 ...mount-point-belongs-to-the-current-user.md |   27 -
 ...rd-to-allow-mknod-in-non-initial-userns.md |   28 -
 ...ses-watched-via-pidfd-from-waitid-p-all.md |   22 -
 ...g-with-classic-synchronous-system-calls.md |   14 -
 ...-mount-properties-ignoring-any-failures.md |   12 -
 ...hing-to-all-new-namespaces-of-a-process.md |   18 -
 .../filtering-on-received-file-descriptors.md |   16 -
 .../immutable-layers-for-overlayfs.md         |   18 -
 .../immutable-loopback-block-devices.md       |   23 -
 .../inotify-events-for-bsd-file-locks.md      |   26 -
 .../wishlist/ioctl-api-for-overlayfs.md       |   23 -
 ...ing-of-o-tmpfile-files-with-replacement.md |   21 -
 .../make-quotas-work-with-user-namespaces.md  |   18 -
 ...fferent-uids-gids-to-the-same-uids-gids.md |   17 -
 .../namespaced-loop-and-block-devices.md      |   14 -
 ...open-thread-group-leader-via-pidfd-open.md |   18 -
 ...ath-based-acl-management-in-an-lsm-hook.md |   17 -
 .../per-cgroup-limit-for-coredump-sizes.md    |   18 -
 ...creation-and-opening-of-non-file-inodes.md |   31 -
 .../race-free-mounting-of-block-devices.md    |   18 -
 .../read-only-propagation-of-mounts.md        |   29 -
 .../reasonable-eof-on-sock-seqpacket.md       |   17 -
 ...ling-of-selinux-dropping-scm-rights-fds.md |   19 -
 .../security-hook-for-mount-setattr.md        |   13 -
 ...yring-for-dm-verity-volume-verification.md |   17 -
 .../unlinking-via-two-file-descriptors.md     |   22 -
 .../wishlist/upgrade-masks-in-openat2.md      |   13 -
 ...l-handling-of-lsm-denials-on-scm-rights.md |   41 -
 website/content/wishlist/xattrs-for-pidfd.md  |   16 -
 .../partials/docs/inject/content-before.html  |   19 -
 website/layouts/partials/docs/links/edit.html |    2 +-
 84 files changed, 1221 insertions(+), 2011 deletions(-)
 delete mode 100644 .github/ISSUE_TEMPLATE/feature-idea.yml
 delete mode 100644 .github/PULL_REQUEST_TEMPLATE.md
 delete mode 100644 CONTRIBUTING.md
 delete mode 100644 website/content/_index.md
 delete mode 100644 website/content/completed/_index.md
 delete mode 100644 website/content/completed/a-reliable-way-to-check-for-pid-namespacing.md
 delete mode 100644 website/content/completed/a-system-call-to-query-information-about-a-mount.md
 delete mode 100644 website/content/completed/additional-identifiers-for-pidfds-in-statx.md
 delete mode 100644 website/content/completed/allow-creating-idmapped-mounts-from-idmapped-mounts.md
 delete mode 100644 website/content/completed/api-to-determine-the-parent-process-id-of-a-pidfd.md
 delete mode 100644 website/content/completed/disabling-reception-of-scm-rights-for-af-unix-sockets.md
 delete mode 100644 website/content/completed/idmapped-mounts-for-tmpfs.md
 delete mode 100644 website/content/completed/make-statx-on-a-pidfd-return-additional-info.md
 delete mode 100644 website/content/completed/mount-a-subdirectory-instead-of-the-top-level-directory.md
 delete mode 100644 website/content/completed/mount-notifications-without-rescanning-of-proc-self-mountinfo.md
 delete mode 100644 website/content/completed/namespace-binfmt-misc-filesystem.md
 delete mode 100644 website/content/completed/namespace-ioctl-to-translate-a-pid-between-pid-namespaces.md
 delete mode 100644 website/content/completed/require-a-user-namespace-to-have-an-idmapping-when-attached.md
 delete mode 100644 website/content/completed/scm-pidfd-auxiliary-socket-message.md
 delete mode 100644 website/content/completed/security-hook-for-create-user-ns.md
 delete mode 100644 website/content/completed/set-comm-field-before-exec.md
 delete mode 100644 website/content/completed/support-idmapped-mounts-for-overlayfs.md
 delete mode 100644 website/content/completed/take-ip-unicast-if-into-account-for-routing-decisions.md
 delete mode 100644 website/content/completed/unmounting-of-obstructed-mounts.md
 delete mode 100644 website/content/in-progress/_index.md
 delete mode 100644 website/content/in-progress/ability-to-put-user-xattrs-on-s-ifsock-socket-entrypoint-inodes-in-the-file-system.md
 delete mode 100644 website/content/in-progress/add-immutable-rootfs-nullfs.md
 delete mode 100644 website/content/in-progress/allow-move-mount-beneath-on-the-rootfs.md
 delete mode 100644 website/content/in-progress/create-empty-mount-namespaces-via-unshare-unshare-empty-mntns-and-clone3-clone-empty-mntns.md
 delete mode 100644 website/content/in-progress/create-mount-namespace-with-custom-rootfs-via-open-tree-and-fsmount.md
 delete mode 100644 website/content/in-progress/query-mount-information-via-file-descriptor-with-statmount.md
 delete mode 100644 website/content/in-progress/support-detached-mounts-with-pivot-root.md
 delete mode 100644 website/content/wishlist/_index.md
 delete mode 100644 website/content/wishlist/a-timeout-for-the-flock-syscall.md
 delete mode 100644 website/content/wishlist/ability-to-only-open-regular-files.md
 delete mode 100644 website/content/wishlist/ability-to-reopen-a-struct-block-device.md
 delete mode 100644 website/content/wishlist/add-mount-setattr-locked-flag-to-mount-setattr.md
 delete mode 100644 website/content/wishlist/add-process-by-pidfd-to-a-cgroup.md
 delete mode 100644 website/content/wishlist/asynchronous-close.md
 delete mode 100644 website/content/wishlist/at-empty-path-support-for-openat-and-openat2.md
 delete mode 100644 website/content/wishlist/at-empty-path-support-for-unlinkat.md
 delete mode 100644 website/content/wishlist/automatic-growing-of-btrfs-filesystems.md
 delete mode 100644 website/content/wishlist/auxiliary-socket-message-describing-the-sender-s-cgroup.md
 delete mode 100644 website/content/wishlist/blobfs.md
 delete mode 100644 website/content/wishlist/clock-monotonic-network-timestamps.md
 delete mode 100644 website/content/wishlist/clone-pidfd-autokill-semantics-for-pid-1.md
 delete mode 100644 website/content/wishlist/determining-if-a-mount-point-belongs-to-the-current-user.md
 delete mode 100644 website/content/wishlist/device-cgroup-guard-to-allow-mknod-in-non-initial-userns.md
 delete mode 100644 website/content/wishlist/excluding-processes-watched-via-pidfd-from-waitid-p-all.md
 delete mode 100644 website/content/wishlist/extend-io-uring-with-classic-synchronous-system-calls.md
 delete mode 100644 website/content/wishlist/extend-mount-setattr-to-allow-changing-mount-properties-ignoring-any-failures.md
 delete mode 100644 website/content/wishlist/extend-setns-to-allow-attaching-to-all-new-namespaces-of-a-process.md
 delete mode 100644 website/content/wishlist/filtering-on-received-file-descriptors.md
 delete mode 100644 website/content/wishlist/immutable-layers-for-overlayfs.md
 delete mode 100644 website/content/wishlist/immutable-loopback-block-devices.md
 delete mode 100644 website/content/wishlist/inotify-events-for-bsd-file-locks.md
 delete mode 100644 website/content/wishlist/ioctl-api-for-overlayfs.md
 delete mode 100644 website/content/wishlist/linking-of-o-tmpfile-files-with-replacement.md
 delete mode 100644 website/content/wishlist/make-quotas-work-with-user-namespaces.md
 delete mode 100644 website/content/wishlist/map-different-uids-gids-to-the-same-uids-gids.md
 delete mode 100644 website/content/wishlist/namespaced-loop-and-block-devices.md
 delete mode 100644 website/content/wishlist/open-thread-group-leader-via-pidfd-open.md
 delete mode 100644 website/content/wishlist/path-based-acl-management-in-an-lsm-hook.md
 delete mode 100644 website/content/wishlist/per-cgroup-limit-for-coredump-sizes.md
 delete mode 100644 website/content/wishlist/race-free-creation-and-opening-of-non-file-inodes.md
 delete mode 100644 website/content/wishlist/race-free-mounting-of-block-devices.md
 delete mode 100644 website/content/wishlist/read-only-propagation-of-mounts.md
 delete mode 100644 website/content/wishlist/reasonable-eof-on-sock-seqpacket.md
 delete mode 100644 website/content/wishlist/reasonable-handling-of-selinux-dropping-scm-rights-fds.md
 delete mode 100644 website/content/wishlist/security-hook-for-mount-setattr.md
 delete mode 100644 website/content/wishlist/specification-of-a-keyring-for-dm-verity-volume-verification.md
 delete mode 100644 website/content/wishlist/unlinking-via-two-file-descriptors.md
 delete mode 100644 website/content/wishlist/upgrade-masks-in-openat2.md
 delete mode 100644 website/content/wishlist/useful-handling-of-lsm-denials-on-scm-rights.md
 delete mode 100644 website/content/wishlist/xattrs-for-pidfd.md
 delete mode 100644 website/layouts/partials/docs/inject/content-before.html

diff --git a/.github/ISSUE_TEMPLATE/feature-idea.yml b/.github/ISSUE_TEMPLATE/feature-idea.yml
deleted file mode 100644
index ca72c2a..0000000
--- a/.github/ISSUE_TEMPLATE/feature-idea.yml
+++ /dev/null
@@ -1,53 +0,0 @@
-name: Feature Idea
-description: Propose a new kernel feature idea
-labels: ["wishlist"]
-body:
-  - type: input
-    id: title
-    attributes:
-      label: Feature title
-      description: A short, descriptive title for the feature.
-      placeholder: "e.g., Support detached mounts with pivot_root()"
-    validations:
-      required: true
-
-  - type: textarea
-    id: description
-    attributes:
-      label: Description
-      description: Describe the feature and how it would work.
-    validations:
-      required: true
-
-  - type: textarea
-    id: usecase
-    attributes:
-      label: Use-Case
-      description: Why is this feature valuable? What problem does it solve?
-    validations:
-      required: true
-
-  - type: dropdown
-    id: category
-    attributes:
-      label: Category
-      description: Which subsystem area does this relate to?
-      multiple: true
-      options:
-        - mounts
-        - pidfd
-        - namespaces
-        - filesystems
-        - sockets
-        - cgroups
-        - block-devices
-        - security
-        - io-uring
-        - processes
-        - other
-
-  - type: textarea
-    id: links
-    attributes:
-      label: Related links
-      description: Links to mailing list threads, patches, documentation, etc.
diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
deleted file mode 100644
index 51eb9c0..0000000
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ /dev/null
@@ -1,17 +0,0 @@
-## Feature
-
-<!-- Title of the feature being added, updated, or completed -->
-
-## Type
-
-<!-- Check one -->
-- [ ] New wishlist item
-- [ ] Moving item to in-progress
-- [ ] Marking item as completed
-- [ ] Other (describe below)
-
-## Checklist
-
-- [ ] Front matter is complete (`title`, `status`, `categories`)
-- [ ] Use-case section is included
-- [ ] Cc: Christian Brauner noted if implementing a feature from this list
diff --git a/.github/workflows/gh-pages.yml b/.github/workflows/gh-pages.yml
index d094f70..7c33ca7 100644
--- a/.github/workflows/gh-pages.yml
+++ b/.github/workflows/gh-pages.yml
@@ -13,7 +13,7 @@ jobs:
     concurrency:
       group: ${{ github.workflow }}-${{ github.ref }}
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v3
         with:
           submodules: true
           fetch-depth: 0
@@ -27,20 +27,6 @@ jobs:
       - name: Build
         run: cd website && hugo --minify -d ../public
 
-      - name: Validate front matter
-        run: |
-          errors=0
-          for f in $(find website/content -name '*.md' ! -name '_index.md'); do
-            if ! head -1 "$f" | grep -q '^---$'; then
-              echo "ERROR: $f missing front matter"
-              errors=$((errors + 1))
-            fi
-          done
-          if [ "$errors" -gt 0 ]; then
-            echo "$errors file(s) with missing front matter"
-            exit 1
-          fi
-
       - name: Deploy
         uses: peaceiris/actions-gh-pages@v3
         if: ${{ github.ref == 'refs/heads/main' }}
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
deleted file mode 100644
index c171fe3..0000000
--- a/CONTRIBUTING.md
+++ /dev/null
@@ -1,77 +0,0 @@
-# Contributing
-
-Thank you for your interest in the UAPI Group Kernel Feature Wishlist. This
-document explains how to propose new features, claim existing ones, or mark
-items as completed.
-
-## Proposing a new feature
-
-1. Create a new Markdown file in the appropriate section directory
-   under `website/content/`:
-   - `wishlist/` for new ideas
-   - `in-progress/` for features you are actively working on
-
-2. Use the following front matter template:
-
-   ```yaml
-   ---
-   title: "Short descriptive title"
-   weight: 10
-   status: wishlist
-   categories:
-     - mounts
-     - namespaces
-   ---
-   ```
-
-3. Include a clear description of the feature and a **Use-Case** section
-   explaining why this would be valuable.
-
-4. Open a pull request.
-
-## Claiming an item
-
-To indicate you are working on a wishlist item:
-
-1. Move the file from `website/content/wishlist/` to
-   `website/content/in-progress/`.
-2. Update the `status` field in the front matter to `in-progress`.
-3. Open a pull request with your GitHub handle or email address noted.
-
-## Marking an item as completed
-
-When a feature has been merged into the kernel:
-
-1. Move the file from its current location to `website/content/completed/`.
-2. Update the `status` field to `completed`.
-3. Add a `commit` field to the front matter with the commit SHA.
-4. Open a pull request.
-
-## Attribution
-
-**When implementing ideas on this list or ideas inspired by this list,
-please point that out explicitly and clearly in the associated patches
-and Cc `Christian Brauner <brauner (at) kernel (dot) org>`.**
-
-## Categories
-
-Use one or more of the following categories in front matter:
-
-- `mounts` — mount namespaces, pivot_root, move_mount, statmount
-- `pidfd` — pidfd xattrs, CLONE_PIDFD, SCM_PIDFD
-- `namespaces` — user namespaces, PID namespaces, mount namespaces
-- `filesystems` — nullfs, blobfs, overlayfs, tmpfs, binfmt_misc
-- `sockets` — AF_UNIX, SCM_RIGHTS, SCM_PIDFD
-- `cgroups` — device cgroups, coredump limits
-- `block-devices` — loop devices, dm-verity, diskseq
-- `security` — LSM hooks, user namespace restrictions
-- `io-uring` — io_uring extensions
-- `processes` — pidfd, clone3, waitid, prctl
-
-## Local development
-
-```sh
-git clone --recurse-submodules https://github.com/uapi-group/kernel-features.git
-cd kernel-features/website
-hugo server
-```
diff --git a/README.md b/README.md
index 5c972cf..ce45731 100644
--- a/README.md
+++ b/README.md
@@ -1,42 +1,1199 @@
-# Kernel Feature Wishlist
+# Kernel Features 🤞 🎁 🙏
 
-A curated collection of kernel feature ideas maintained by the
-[UAPI Group](https://uapi-group.org/). Browse the full list at
-**[uapi-group.org/kernel-features](https://uapi-group.org/kernel-features/)**.
+This is a list of kernel features that would be useful to have. The items on
+the list are strictly ideas. It is especially important to not take the items
+on this list as being implementation requests. Some of the ideas on this list
+are rather rough and unrefined. They serve as entry points for exploring the
+associated problem space.
 
-## Overview
+* **When implementing ideas on this list or ideas inspired by this list
+  please point that out explicitly and clearly in the associated patches
+  and Cc `Christian Brauner <brauner (at) kernel (dot) org`.**
 
-This repository tracks kernel feature ideas across several categories:
+* Move the item you are working to the In-Progress section.
+  Please add your github handle or mail address to the issue so we can
+  ping you.
 
-- **In Progress** — features currently being designed or implemented
-- **Wishlist** — ideas and proposals waiting for someone to pick them up
-- **Completed** — features that have been merged into the kernel
+## In-Progress
 
-Each feature is documented in its own page under `website/content/` with a
-description, use-case, and category tags.
+### Create empty mount namespaces via `unshare(UNSHARE_EMPTY_MNTNS)` and `clone3(CLONE_EMPTY_MNTNS)`
 
-## Contributing
+Now that we have support for `nullfs` it is trivial to allow the
+creation of completely empty mount namespaces, i.e., mount namespaces
+that only have the `nullfs` mount located at it's root.
 
-See [CONTRIBUTING.md](CONTRIBUTING.md) for details on how to:
+**Usecase:** This allows to isolate tasks in completely empty mount
+namespaces. It also allows the caller to avoid copying its current mount
+table which is useless in the majority of container workload cases.
 
-- Propose a new feature idea
-- Claim an existing wishlist item
-- Mark a feature as completed
+### Ability to put user xattrs on `S_IFSOCK` socket entrypoint inodes in the file system
 
-**When implementing ideas on this list or ideas inspired by this list,
-please point that out explicitly and clearly in the associated patches
-and Cc `Christian Brauner <brauner (at) kernel (dot) org>`.**
+Currently, the kernel only allows extended attributes in the
+`user.*` namespace to be attached to directory and regular file
+inodes. It would be tremendously useful to allow them to be
+associated with socket inodes, too.
 
-## Local development
+**Usecase:** There are two syslog RFCs in use today: RFC3164 and
+RFC5424. `glibc`'s `syslog()` API generates events close to the
+former, but there are programs which would like to generate the
+latter instead (as it supports structured logging). The two formats
+are not backwards compatible: a client sending RFC5424 messages to a
+server only understanding RFC3164 will cause an ugly mess. On Linux
+there's only a single `/dev/log` AF_UNIX/SOCK_DGRAM socket backing
+`syslog()`, which is used in a one-way, fire-and-forget style. This
+means that feature negotation is not really possible within the
+protocol. Various tools bind mount the socket inode into `chroot()`
+and container environments, hence it would be fantastic to associate
+supported feature information directly with the inode (and thus
+outside of the protocol) to make it easy for clients to determine
+which features are spoken on a socket, in a way that survives bind
+mounts. Implementation idea would be that syslog daemons
+implementing RFC5425 could simply set an xattr `user.rfc5424` to `1`
+(or something like that) on the socket inode, and clearly inform
+clients in a natural and simple way that they'd be happy to parse
+the newer format. Also see:
+https://github.com/systemd/systemd/issues/19251 – This idea could
+also be extended to other sockets and other protocols: by setting
+some extended attribute on a socket inodes, services could advertise
+which protocols they support on them. For example D-Bus sockets
+could carry `user.dbus` set to `1`, and Varlink sockets
+`user.varlink` set to `1` and so on.
+
+### Support detached mounts with `pivot_root()`
+
+The new rootfs must currently refer to an attached mount. This restriction
+seems unnecessary. We should allow the new rootfs to refer to a detached
+mount.
+
+This will allow a service- or container manager to create a new rootfs as
+a detached, private mount that isn't exposed anywhere in the filesystem and
+then `pivot_root()` into it.
+
+Since `pivot_root()` only takes path arguments the new rootfs would need to
+be passed via `/proc/<pid>/fd/<nr>`. In the long run we should add a new
+`pivot_root()` syscall operating on file descriptors instead of paths.
+
+### Create mount namespace with custom rootfs via `open_tree()` and `fsmount()`
+
+Add `OPEN_TREE_NAMESPACE` flag to `open_tree()` and `FSMOUNT_NAMESPACE` flag
+to `fsmount()` that create a new mount namespace with the specified mount tree
+as the rootfs mounted on top of a copy of the real rootfs. These return a
+namespace file descriptor instead of a mount file descriptor.
+
+This allows `OPEN_TREE_NAMESPACE` to function as a combined
+`unshare(CLONE_NEWNS)` and `pivot_root()`.
+
+When creating containers the setup usually involves using `CLONE_NEWNS` via
+`clone3()` or `unshare()`. This copies the caller's complete mount namespace.
+The runtime will also assemble a new rootfs and then use `pivot_root()` to
+switch the old mount tree with the new rootfs. Afterward it will recursively
+unmount the old mount tree thereby getting rid of all mounts.
+
+Copying all of these mounts only to get rid of them later is wasteful. With a
+large mount table and a system where thousands of containers are spawned in
+parallel this quickly becomes a bottleneck increasing contention on the
+semaphore.
+
+**Use-Case:** Container runtimes can create an extremely minimal rootfs
+directly:
+
+```c
+fd_mntns = open_tree(-EBADF, "/var/lib/containers/wootwoot", OPEN_TREE_NAMESPACE);
+```
+
+This creates a mount namespace where "wootwoot" has become the rootfs. The
+caller can `setns()` into this new mount namespace and assemble additional
+mounts without copying and destroying the entire parent mount table.
+
+### Add immutable rootfs (`nullfs`)
+
+Currently `pivot_root()` doesn't work on the real rootfs because it
+cannot be unmounted. Userspace has to do a recursive removal of the
+initramfs contents manually before continuing the boot.
+
+Add an immutable rootfs called `nullfs` that serves as the parent mount
+for anything that is actually useful such as the tmpfs or ramfs for
+initramfs unpacking or the rootfs itself. The kernel mounts a
+tmpfs/ramfs on top of it, unpacks the initramfs and fires up userspace
+which mounts the rootfs and can then simply do:
+
+```c
+chdir(rootfs);
+pivot_root(".", ".");
+umount2(".", MNT_DETACH);
+```
+
+This also means that the rootfs mount in unprivileged namespaces doesn't
+need to become `MNT_LOCKED` anymore as it's guaranteed that the
+immutable rootfs remains permanently empty so there cannot be anything
+revealed by unmounting the covering mount.
+
+**Use-Case:** Simplifies the boot process by enabling `pivot_root()` to
+work directly on the real rootfs. Removes the need for traditional
+`switch_root` workarounds. In the future this also allows us to create
+completely empty mount namespaces without risking to leak anything.
+
+### Allow `MOVE_MOUNT_BENEATH` on the rootfs
+
+Allow `MOVE_MOUNT_BENEATH` to target the caller's rootfs, enabling
+root-switching without `pivot_root(2)`. The traditional approach to
+switching the rootfs involves `pivot_root(2)` or a `chroot_fs_refs()`-based
+mechanism that atomically updates `fs->root` for all tasks sharing the
+same `fs_struct`. This has consequences for `fork()`, `unshare(CLONE_FS)`,
+and `setns()`.
+
+Instead, decompose root-switching into individually atomic, locally-scoped
+steps:
+
+```c
+fd_tree = open_tree(-EBADF, "/newroot",
+                    OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC);
+fchdir(fd_tree);
+move_mount(fd_tree, "", AT_FDCWD, "/",
+           MOVE_MOUNT_BENEATH | MOVE_MOUNT_F_EMPTY_PATH);
+chroot(".");
+umount2(".", MNT_DETACH);
+```
+
+Since each step only modifies the caller's own state, the
+`fork()`/`unshare()`/`setns()` races are eliminated by design.
+
+To make this work, `MNT_LOCKED` is transferred from the top mount to the
+mount beneath. The new mount takes over the job of protecting the parent
+mount from being revealed. This also makes it possible to safely modify
+an inherited mount table after `unshare(CLONE_NEWUSER | CLONE_NEWNS)`:
 
 ```sh
-git clone --recurse-submodules https://github.com/uapi-group/kernel-features.git
-cd kernel-features/website
-hugo server
+mount --beneath -t tmpfs tmpfs /proc
+umount -l /proc
 ```
 
-Requires [Hugo](https://gohugo.io/) (extended edition).
+**Use-Case:** Containers created with `unshare(CLONE_NEWUSER | CLONE_NEWNS)`
+can reshuffle an inherited mount table safely. `MOVE_MOUNT_BENEATH` on the
+rootfs makes it possible to switch out the rootfs without the costly
+`pivot_root(2)` and without cross-namespace vulnerabilities.
+
+### Query mount information via file descriptor with `statmount()`
+
+Extend `struct mnt_id_req` to accept a file descriptor and introduce
+`STATMOUNT_BY_FD` flag. When a valid fd is provided and `STATMOUNT_BY_FD`
+is set, `statmount()` returns mount info about the mount the fd is on.
+
+This works even for "unmounted" mounts (mounts that have been unmounted using
+`umount2(mnt, MNT_DETACH)`), if you have access to a file descriptor on that
+mount. These unmounted mounts will have no mountpoint and no valid mount
+namespace, so `STATMOUNT_MNT_POINT` and `STATMOUNT_MNT_NS_ID` are unset in
+`statmount.mask` for such mounts.
+
+**Use-Case:** Query mount information directly from a file descriptor without
+needing the mount ID, which is particularly useful for detached or unmounted
+mounts.
+
+### `AT_EMPTY_PATH` support `for unlinkat()`
+
+**Use-Case:** When dealing with files/directories, allow passing
+around only a file descriptor without having to keep the path around
+to be able to unlink the file/directory.
+
+### `AT_EMPTY_PATH` support for `openat()` and `openat2()`
+
+To get an operable version of an `O_PATH` file descriptors, it is
+possible to use `openat(fd, ".", O_DIRECTORY)` for directories, but
+other files currently require going through
+`open("/proc/<pid>/fd/<nr>")` which depends on a functioning `procfs`.
+
+FreeBSD already has `O_EMPTY_PATH` for `openat`, while `fstatat` and
+similar functions have `AT_EMPTY_PATH`.
+
+**Use-Case:** When dealing with `O_PATH` file descriptors, allow
+re-opening an operable version without the need of `procfs`.
+
+---
+
+### TODO
+
+### xattrs for pidfd
+
+Since pidfds have been moved to a separate pidfs filesystem it is easy
+to add support for xattrs on pidfds. That could be valuable to store
+meta information along the pidfd. Storing an xattr should probably make
+the pidfd automatically persistent, i.e., the reference for the dentry
+is only put once the task is reaped.
+
+**Use-Case:** Store meta information alongside pidfds.
+
+### `CLONE_PIDFD_AUTOKILL` semantics for PID 1
+
+Allow obtaining a `CLONE_PIDFD_AUTOKILL` pidfd for PID 1. Currently
+PID 1 cannot hand off an autokill pidfd for itself. Allowing this would
+make it possible to create system-death-traps where the lifetime of
+PID 1 is tied to another process. PID 1 creates a `CLONE_PIDFD_AUTOKILL`
+pidfd for itself, hands it off to another task, and closes its own copy.
+If that other task exits, PID 1 is taken down.
+
+**Use-Case:** Tie the lifetime of PID 1 to a critical process such as a
+software TPM or other security-sensitive daemon. This ensures the system
+is brought down if the critical process dies, rather than continuing to
+run in a potentially compromised state.
+
+**Considerations:** When PID 1 is spawned there is no mechanism to start
+it with a pidfd right away. There are two possible approaches:
+
+1. Place a pidfd at file descriptor position 3 in PID 1's file descriptor
+   table before `exec()`, similar to how the coredump usermodehelper works.
+   After `exec()` PID 1 knows that it already has an autokill pidfd for
+   itself opened at fd 3.
+
+2. Allow opening an autokill pidfd via `pidfd_open()`. This would require
+   mutual exclusion with `CLONE_PIDFD_AUTOKILL`: if an autokill pidfd
+   already exists from `clone3()` then no new autokill pidfd can be
+   created via `pidfd_open()`. This guarantees clean semantics.
+
+Permission checking would have to be strict. It should probably only be
+allowed for the current thread-group leader on itself.
+
+### inotify() events for BSD file locks
+
+BSD file locks (i.e. `flock()`, as opposed to POSIX `F_SETLK` and
+friends are inode-focussed, hence would be great if one could get
+asynchronous notification when they are released via inotify.
+
+**Use-Case:** udevd probes block devices whenever they pop up to
+create /dev/disk/by-label/* and similar symlinks. Formatting tools
+can temporarily block this behaviour by taking a BSD file lock on
+the block device (as per https://systemd.io/BLOCK_DEVICE_LOCKING),
+in order to make sure udevd doesn't probe file systems/partition
+tables that are only partially initialized. Currently, udevd uses
+inotify `IN_CLOSE_WRITE` notifications to detect whenever
+applications close a block device after writing to it, and
+automatically reprobes the device. This works reasonably OK given
+that block devices are usually closed at the same time as their
+BSD file lock is released, and vice versa. However, this is not
+fully correct: what udevd actually should be watching is the locks
+being released, not the devices being closed.
+
+### Auxiliary socket message describing the sender's cgroup
+
+`SCM_CGROUPID` or a similar auxiliary socket message, that allows
+receivers to figure out which cgroup a sender is part of.
+
+**Use-Case:** `systemd-journald` picks up cgroup information from
+logging clients, in order to augment log records and allow
+filtering via this meta-information. In particular it derives
+service identity from that (so that requests such as "Show me all
+log messages of service X!" can be answered). This is currently
+racy, since it uses `SCM_CREDS`' `.pid` field for this, which it then
+used to load `/proc/$PID/cgroup`. In particular for programs that
+log and immediately exit, the cgroup information frequently cannot
+be acquired anymore by `systemd-journald`.
+
+### Linking of `O_TMPFILE` files with replacement
+
+Ability to link an `O_TMPFILE` file into a directory while *replacing* an
+existing file. (Currently there's only the ability to link it in, if the
+file name doesn't exist yet.)
+
+**Use-Case:** there are many programs (e.g. `systemd-hostnamed`
+when updating `/etc/hostname`) that atomically want to update a
+file, so that either the old or the new version is in place, but
+never a partially updated one. The canonical way to do this is by
+creating a temporary file with the new contents, and then renaming
+it to the filename of the file to update, thus atomically replacing
+it. Currently, the temporary file for this must be created with a
+random name, `O_TMPFILE` cannot be used, since for these files
+atomic-replace is not supported, currently.
+
+### Ability to only open regular files
+
+`O_REGULAR` (inspired by the existing `O_DIRECTORY` flag for
+`open()`), which opens a file only if it is of type `S_IFREG`.
+
+**Use-Case:** this would be very useful to write secure programs
+that want to avoid being tricked into opening device nodes with
+special semantics while thinking they operate on regular
+files. This is particularly relevant as many device nodes (or even
+FIFOs) come with blocking I/O (or even blocking `open()`!) by
+default, which is not expected from regular files backed by "fast"
+disk I/O. Consider implementation of a naive web browser which is
+pointed to `file://dev/zero`, not expecting an endless amount of
+data to read.
+
+### Unlinking via two file descriptors
+
+`unlinkat3(dir_fd, name, inode_fd)`: taking one file descriptor
+for the directory to remove a file in, and another one referring
+to the inode of the filename to remove. This call should only
+succeed if the specified filename still refers to the specified
+inode.
+
+**Use-Case:** code that operates on a well-know path that might be
+shared by multiple programs that jointly manage it might want to
+safely remove a filename under the guarantee it still refers to
+the expected inode. As a specific example, consider lock files,
+that should be cleaned up only if they still refer to the assumed
+owner's instance, but leave the file in place if another process
+already took over the filename.
+
+### Determining if a mount point belongs to the current user
+
+Ability to determine if a mount point belongs to the current user
+namespace, in order to check if there's a chance a process can
+safely unmount it (as that only works for mounts owned by the same
+user namespaces — or one further down the tree, but not any up the
+tree). A simple, additional field in `/proc/self/mountinfo`
+container the owning user namespace ID would probably already
+suffice.
+
+**Use-Case:** the `systemd` system and service manager tries to unmount
+all established mounts on shutdown. Inside of container
+environments where specific mounts are established by the
+container manager (and not the payload itself) this will
+ultimately fail if user namespaces are enabled. In order to clean
+up the shutdown logic it would be very good to be able to
+determine whether a specific mount could even possibly be
+unmounted or whether it's not worth the effort to include the
+unmount in the system shutdown transaction.
+
+### Read-only propagation of mounts
+
+A way to mark mounts that receive mount propagation events from
+elsewhere so that these propagated mounts are established
+read-only implicitly. Right now, if a mount receives a mount
+propagation event it will have the exact same `MS_RDONLY`,
+`MS_NODEV`, … flags as it has where it originated. It would be
+very useful if an `MS_RDONLY` could be ORed into the mount flags
+automatically whenever propagated elsewhere.
+
+**Use-Case:** various mount namespace based sandboxes
+(e.g. `systemd`'s `ProtectSystem=` option) mark large parts of the
+host file hierarchy read-only via mounting it
+`MS_RDONLY|MS_BIND|MS_REMOUNT`, but generally intend to leave the
+file hierarchy besides that the way it is, and that includes they
+typically still want to be able to receive mount events to
+directories such as `/mnt/` and `/media/` in these sandboxed
+environments. Right now, any such propagation then happens in
+writable mode, even if the file hierarchy otherwise is almost
+entirely read-only. To close this gap it would be great if such
+propagated mounts could implicitly gain `MS_RDONLY` as they are
+propagated.
+
+### Filtering on received file descriptors
+
+An alternative to the previous item could be if some form of filtering
+could be enforced on the file descriptors suitable for enqueuing on
+the `AF_UNIX` socket. i.e. allow filtering by superblock type or
+similar, so that policies such as "only `memfd`s are OK to be
+received" may be expressed. (BPF?).
+
+**Use-Case:** as above.
+
+### Excluding processes watched via `pidfd` from `waitid(P_ALL, …)`
+
+**Use-Case:** various programs use `waitid(P_ALL, …)` to collect exit
+information of exited child processes. In particular PID 1 and
+processes using `PR_SET_CHILD_SUBREAPER` use this as they may
+collect unexpected children that have been reparented from dying
+sub-processes, and that need to be reaped in order to clean up the
+PID space. Currently, these programs cannot easily mix waiting for
+specific sub-processes via `pidfd` with waiting for the other
+*unexpected* children via `waitid(P_ALL, …)` since the latter also
+reaps (and thus invalidates) the pidfd-tracked
+children. Specifically, the `systemd` service manager would like
+to use `pidfd`s to remove PID recycling security issues, but
+currently cannot as it also needs to generically wait for such
+unexpected children.
+
+### Asynchronous `close()`
+
+An asynchronous or forced `close()`, that guarantees that
+userspace doesn't have to risk blocking for longer periods of time
+when trying to get rid of unwanted file descriptors, possibly
+received via `recvmsg()` + `SCM_RIGHTS` (see above). Currently,
+`close()` of various file descriptors (for example those referring
+to slow storage, e.g. non-responding NFS servers and such) might
+take arbitrary amounts of time, potentially into the minute range
+and more. This makes it risky accepting file descriptors on
+publicly accessible `AF_UNIX` sockets, the way like IPC brokers
+(e.g. D-Bus) do it: if a rogue client keeps sending file
+descriptors that because unexpected must be closed immediately it
+might cause the receiving process to effectively crawl, when it is
+busy closing them all. A special form of `close()` that simply
+detaches a file descriptor from the file descriptor table without
+blocking on IO in any form would be great to close this issue.
+
+**Use-Case:** any program that receives file descriptors via `AF_UNIX`
+from untrusted clients would benefit from this. e.g. D-Bus
+brokers.
+
+### `CLOCK_MONOTONIC` network timestamps
+
+Currently network timestamps are exclusively in `CLOCK_REALTIME`, even
+though for many (most?) a monotonic clock would be much preferable, as
+calculations become easier when one doesn't have to think about clock
+jumps and similar.
+
+**Use-Case:** `systemd-journald` collects implicit timestamps via
+`AF_UNIX` time-stamping, in `CLOCK_REALTIME`, even though for its
+internal logic only monotonic timestamps are used, as log records
+are searched via bisection in ordered tables, that require
+strictly increasing timestamps. In particular during boot (where
+`CLOCK_REALTIME` is often not available, stable or subject to
+corrections) it would be good to have reliable, monotonic
+timestamps on all log records.
+
+### Immutable loopback block devices
+
+Truly immutable loopback block devices. Right now setting up a
+loopback block device in read-only mode, backed by a read-only
+file (stored on a regular read/write file system), and then
+mounting it with `ext4` also in `MS_RDONLY`mode *will* result in
+changes to the file, quite unexpectedly 🤯. Ideally, if a loopback
+block device is set up in read-only mode this should guarantee
+that the backing file remains unmodified by it.
+
+**Use-Case:** disk image build tools that want to reproducibly and
+verifiable build images must be able to rely that mounting them in
+read-only mode does not alter the images in any way. In particular
+when working in computer forensics one must be able to rely that
+file systems that are analyzed remain unmodified by the analysis.
+
+### A timeout for the `flock()` syscall
+
+A timeout for the `flock()` syscall. Faking the time-out in userspace
+is nasty: most code does it with `alarm()` (or equivalent APIs), but
+that's racy since on a heavily loaded system the timeout might trigger
+before the `flock()` call is entered, in particular if short time-outs
+shall be used. More accurate is to do the locking in a short-lived
+child processed, but that's difficult already in C, and almost
+impossible in languages that do not allow `fork()` without `execve()`.
+
+**Use-Case:** as mentioned above systemd-udev allows synchronizing
+block device probing via flock(). Often userspace wants to wait
+for that, but without risking to hang forever.
+
+### Extend `mount_setattr()` to allow changing mount properties ignoring any failures
+
+**Use-Case:** workloads that know that there are mounts in a mount tree
+whose attributes cannot be changed by the caller don't want
+`mount_settattr()` to fail on the first mount it failed to convert. Give
+them a flag to request changes ignoring failures.
+
+### Upgrade masks in `openat2()`
+
+Add upgrade masks to `openat2()`. Extend `struct open_how` to allow
+restrict re-opening of file descriptors.
+
+**Use-Case:** block services or containers from re-opening/upgrading an
+`O_PATH` file descriptor through e.g. `/proc/<pid>/fd/<nr` as `O_WRONLY`.
+
+### Make quotas work with user namespaces
+
+The quota codepaths in the kernel currently broken and inconsistent
+and most interesting operations are guarded behind
+`capable(CAP_SYS_ADMIN)`, i.e., require `CAP_SYS_ADMIN` in the initial
+user namespace. We should rework these codepaths to work with user
+namespaces and then see whether we can make them work with idmapped
+mounts.
+
+**Use-Case:** using quotas correctly in containers.
+
+### Add `MOUNT_SETATTR_LOCKED` flag to `mount_setattr()`
+
+Add a new `MOUNT_SETATTR_LOCKED` flag to `mount_setattr(..., ..., MOUNT_SETATTR_LOCKED, ..., ...)`.
+The `MOUNT_SETATTR_LOCKED` flag allow a `ns_capable(mntns->user_ns,
+CAP_SYS_ADMIN)` caller to lock all mount properties. The mount properties
+cannot be changed anymore.
+
+**Use-Case:** allowing processes to lock mount properties even for
+privileged processes. Locking mount properties would currently involve
+having to have the mount namespace of the container be owned by an ancestor
+user namespace. But this doesn't just lock a single mount or mount subtree
+it locks all mounts in the mount namespace, i.e., the mount table cannot be
+altered.
+
+### Extend `setns()` to allow attaching to all new namespaces of a process
+
+Add an extension to `setns()` to allow attaching to all namespaces of
+a process `SETNS_PIDFD_ALL` different from the caller's namespaces.
+Currently specifying e.g., `CLONE_NEWUSER` fails if the caller is in the
+same user namespace as the target process. This is very inconvenient.
+
+**Use-Case:** Make it trivial to attach to all namespaces of a process
+without having to figure out whether the caller is already in the same
+namespace or not.
+
+### Security hook for `mount_setattr()`
+
+(kAPI) Add security hook to `mount_setattr()`.
+
+**Use-Case:** Allow LSMs to make decisions about what mount properties to
+allow and what to deny.
+
+### Per-cgroup limit for coredump sizes
+
+A per-cgroup knob for coredump sizes. Currently coredump size
+control is strictly per process, and primarily under control of
+the processes themselves. It would be good if we had a per-cgroup
+knob instead, that is under control of the service manager.
+
+**Use-Case:** coredumps can be heavy to generate. For different
+usecases it would be good to be able to opt-in or opt-out
+dynamically from coredumps for specific services, at runtime
+without restarting them.
+
+### Race-free creation and opening of non-file inodes
+
+A way to race-freely create an (non-file) inode and immediately
+open it. For regular files we have open(O_CREAT) for creating a
+new file inode, and returning a pinning fd to it. This is missing
+for other inode types, such as directories, device nodes,
+FIFOs. The lack of such functionality means that when populating a
+directory tree there's always a race involved: the inodes first
+need to be created, and then opened to adjust their
+permissions/ownership/labels/timestamps/acls/xattrs/…, but in the
+time window between the creation and the opening they might be
+replaced by something else. Addressing this race without proper
+APIs is possible (by immediately fstat()ing what was opened, to
+verify that it has the right inode type), but difficult to get
+right. Hence, mkdirat_fd() that creates a directory *and* returns
+an O_DIRECTORY fd to it would be great. As would be mknodeat_fd()
+that creates a device node, FIFO or (dead) socket and returns an
+O_PATH fd to it. And of course symlinkat_fd() that creates a
+symlink and returns an O_PATH fd to it.
+
+**Use-Case:** any program that creates/unpacks not just files, but
+directories, device nodes, fifos, and wants to ensure that they
+safely get the right attributes applied, even if other code might
+simultaneously have access to the same directory tree.
+
+### Extend `io_uring` with classic synchronous system calls
+
+The `io_uring` subsystem is open to adding classic existing synchronous
+system calls (e.g. `setns()` or `mount()` or other) to `io_uring`.
+They also said they would support adding new functionality into
+`io_uring` that is not exposed through system calls yet.
+
+### Map different uids/gids to the same uids/gids?
+
+Explore the idea of mapping different uids/gids to the same uids/gids, i.e.
+65534:1000:1 50000:1000:1. This will only work if the mount is read-only as
+the kernel wouldn't know what uid/gid would need to be put to disk
+otherwise (65534? 50000? the first one that is mapped?).
+
+**Use-Case:** Delegate multiple {g,u}ids to the same user. Merging
+ownership similar to how overlayfs merges files. Bindfs
+(https://bindfs.org/docs/bindfs.1.html#sect3) allows this concept too.
+
+### blobfs
+
+[`blobfs`](https://fuchsia.dev/fuchsia-src/concepts/filesystems/blobfs)
+for Linux. i.e. a minimalistic file system, that can store
+authenticated (Verity) data files, that can be written once, and
+not be modified after that, and provide stable handles (i.e. is
+content-addressable) to them.
+
+**Use-Case:** This would deliver just about enough to place
+trusted OS resources (binaries, kernels, initrds, fs trees, other
+resources) in them, without having to trust the medium and IO
+underneath. Should be simple enough to even implement in a boot
+loader and similar, without making things vulnerable to rogue file
+system image attacks. The OS and its payloads (apps, containers,
+…) could then be composed from these resources, through means like
+overlayfs, namespacing and more.
+
+### Namespaced loop and block devices
+
+Namespace-able loop and block devices, usable inside user namespaces.
+
+**Use-Case:** Allow mounting images inside nspawn containers, and using
+RootImage= and friends in the systemd user manager.
+
+### Device cgroup guard to allow `mknod()` in non-initial userns
+
+If a container manager restricts its unprivileged (user namespaced)
+children by a device cgroup, it is not necessary to deny `mknod()`
+anymore. Thus, user space applications may map devices on different
+locations in the file system by using `mknod()` inside the container.
+
+**Use-Case:** A use case for this, which is applied by users of GyroidOS,
+is to run `virsh` for VMs inside an unprivileged container. `virsh` or
+libvirt creates device nodes, e.g., `/var/run/libvirt/qemu/11-fgfg.dev/null`
+which currently fails in a non-initial userns, even if a cgroup device white
+list with the corresponding major, minor of `/dev/null` exists. Thus, in
+this case the usual bind mounts or pre populated device nodes under `/dev`
+are not sufficient.
+
+An initial group internal RFC exists in
+(https://github.com/quitschbo/linux/tree/devcg_guard_rfc).
+See commit message for more implementation specific details.
+
+### Race-free mounting of block devices
+
+Introduce a new struct to `fsconfig()` as an alternative to the
+`source` property. The struct contains at least a pointer to a path,
+possibly a device minor and major, and a diskseq number. The VFS can
+expose a helper that filesystems can call and use the diskseq number
+to verify that the block device they are intending to mount is indeed
+the one they want to mount.
+
+**Use-Case:** Race-free mounting of block devices.
+
+### Ability to reopen a `struct block_device`
+
+Add ability to reopen a `struct block_device`. This would allow using
+`blkdev_get_by_path()`/`blkdev_get_{part,whole}()` to claim a device
+with `BLK_OPEN_READ` and later on reopen with
+`BLK_OPEN_READ | BLK_OPEN_WRITE`. This in turn would opening block
+devices at `fsconfig(FS_CONFIG_SET_*)` time and then at `fill_super()`
+time we would be able to reopen in case the `!(fc->sb_flags & SB_RDONLY)`.
+Overall this has the effect that we're able to open devices early
+giving the user early errors when they set mount options rather than
+very late when the superblock is created.
+
+### Specification of a keyring for dm-verity volume verification
+
+When activating a dm-verity volume allow specifying keyring to
+validate root hash signature against.
+
+**Usecase:** In systemd, we'd like to authenticate Portable Service
+images, system extension images, configuration images, container
+images with different keys, as they typically originate from
+different sources and it should not be possible to generate a
+system extension with a key pair that is supposed to be good for
+container images only.
+
+### Path-based ACL management in an LSM hook
+
+The LSM module API should have the ability to do path-based (not
+just inode-based) ACL management.
+
+**Usecase:** This would be useful in BPF-LSM modules such as
+systemd's `mntfsd` which allows unprivileged file system mounts in
+some cases, and which would like to restrict ACL handling based on
+the superblock involved.
+
+### Immutable layers for `overlayfs`
+
+`overlayfs` should permit *immutable* layers, i.e. layers whose
+non-directory inodes may not be overriden in an upper writable
+layer.
+
+**Usecase:** This would be useful when implementing `/etc/` as a
+stack of overlayfs layers, each shipping configuration for a
+different facet of the system, with a writable layer on the top for
+local modifications. In such a scenario it would be useful to allow
+the user to change any configuration it likes, except of the files
+and other inodes shipped in the lower layers.
+
+### `ioctl()` API for `overlayfs`
+
+`overlayfs` should have an `ioctl()`-based API (or similar) for
+querying information of the backing file systems/block devices
+
+**Usecase:** In systemd in various areas we automatically find the
+block device backing the root file system and other file systems
+(Example: `systemd-gpt-auto-generator` or `bootctl` wull try to find
+auxiliary file systems of the OS image by looking in the GPT
+partition table the root file system is located in). While this
+logic is good enough to find the backing block devices of some more
+complex storage such as dm-crypt, dm-verity or btrfs, once
+`overlayfs` is used as backing for the root file system this logic
+does not work anymore. It would be great if there was an API to
+simply query `overlayfs` for the superblock information
+(i.e. `.st_dev`) of the backing layers.
+
+### Automatic growing of `btrfs` filesystems
+
+An *auto-grow* feature in `btrfs` would be excellent.
+
+If such a mode is enabled, `btrfs` would automatically grow a file
+system up to the size of its backing block devices. Example: btrfs
+is created with 200M in size on a block device 2G in size. Once the
+file system is filled up fully, `btrfs` would automatically grow the
+file system as need in the increments it needs, up to the 2G that
+the backing block device is in size.
+
+**Usecase:** This would allow creating minimal, compact file
+systems: just create them small on a sparse block device, and copy
+files into it, as needed, create subvolumes and whatever else is
+desired. As long as only files are created and written (but not
+modified) the resulting fs should be automatically minimal in size.
+This would specifically be useful in `systemd-homed`, which
+maintains per-user `btrfs` file systems backed by block
+devices. Currently, `homed` grows the file systems manually on login
+and then shrinks them again on logout, but this is less than ideal,
+since btrfs places files all over the backing store, and thus the
+shrinking will generate a lot of nonsensical IO that could be
+reduced if the file system was always kept minimal in size anyway.
+
+### Add process by PIDFD to a cgroup
+
+At the moment the canonical way to add a process to a cgroup is by
+echoing its PID into the `cgroup.procs` attribute in the target
+cgroupfs directory of the cgroup. This is safe as long as the
+process doing so just forked off the process it wants to migrate and
+hence can control that it hasn't been reaped yet, and hence
+guarantees the PID is valid. This is racy however if "foreign"
+processes shall be moved into the cgroup.
+
+**Usecase:** In systemd, all user sessions are wrapped in scope
+units which are backed by a cgroup. The session processes moved into
+the scope unit are typically "foreign" processes, i.e. not children
+of the service manager, hence doing the movement is subject to races
+in case the process dies and its PID is quickly recycled. (This
+assumes systemd can acquire a pidfd of the foreign process without
+races, for example via `SCM_PIDFD` and `SO_PEERPIDFD` or similar.)
+
+### Open thread-group leader via `pidfd_open()`
+
+Extend `pidfd_open()` to allow opening the thread-group leader based on the
+PID of an individual thread. Currently we do support:
+
+1. `pidfd_open(1234, 0)` on a thread-group leader PID
+2. `pidfd_open(1234, PIDFD_THREAD)` on a thread
+
+Add an option to go from individual thread to thread-group leader.
+
+**Use-Case:** Allow for a race free way to go from individual thread
+to thread-group leader pidfd.
+
+### Useful handling of LSM denials on SCM_RIGHTS
+
+Right now if some LSM such as SELinux denies an `AF_UNIX` socket peer
+to receive an `SCM_RIGHTS` fd the `SCM_RIGHTS` fd array will be cut
+short at that point, and `MSG_CTRUNC` is set on return of
+`recvmsg()`. This is highly problematic behaviour, because it leaves
+the receiver wondering what happened. As per man page `MSG_CTRUNC` is
+supposed to indicate that the control buffer was sized too short, but
+suddenly a permission error might result in the exact same flag being
+set. Moreover, the receiver has no chance to determine how many fds
+got originally sent and how many were suppressed.
+
+Ideas how to improve things:
+
+1. Maybe introduce a new flag `MSG_RIGHTS_DENIAL` or so which is set
+   on `recvmsg()` return, which tells us that fds where dropped from
+   the `SCM_RIGHTS` array because of an LSM error. This new flag could
+   be set in addition to `CMSG_CTRUNC`, for compatibility.
+
+2. Maybe, define a new flag `MSG_RIGHTS_FILTER` or so which when
+   passed to `recvmsg()` will ensure that the `SCM_RIGHTS` fd array is
+   always passed through in its full, original size. Entries for which
+   an LSM says no are suppressed, and replaced by a special value, for
+   example `-EPERM`.
+
+3. It would be good if the relevant man page would at least document
+   this pitfall, even if it right now cannot reasonably be handled.
+
+Ideally both ideas would be implemented, but of course, strictly
+speaking the 2nd idea makes the 1st idea half-way redundant.
+
+**Use-Case:** Any code that uses `SCM_RIGHTS` generically (D-Bus and
+so on) needs this, so that it can reasonably handle SELinux AVC errors
+on received messages.
+
+### Reasonable EOF on SOCK_SEQPACKET
+
+Zero size datagrams cannot be distinguished from EOF on
+`SOCK_SEQPACKET`. Both will cause `recvmsg()` to return zero.
+
+Idea how to improve things: maybe define a new MSG_XYZ flag for this,
+which causes either of the two cases result in some recognizable error
+code returned rather than a 0.
+
+**Use-Case:** Any code that wants to use `SOCK_SEQPACKET` and cannot
+effort disallowing zero sized datagrams from their protocol.
+
+### Reasonable Handling of SELinux dropping SCM_RIGHTS fds
+
+Currently, if SELinux refuses to let some file descriptor through, it
+will just drop them from the `SCM_RIGHTS` array. That's a terrible
+idea, since applications rely on the precise arrangement of the array
+to know which fd is which. By dropping entries silently, these apps
+will all break.
+
+Idea how to improve things: leave the elements in the array in place,
+but return a marker instead (i.e. negative integer, maybe `-EPERM`) that
+tells userspace that there was an fd, but it was not allowed through.
+
+**Use-Case:** Any code that wants to use `SCM_RIGHTS` properly.
+
+---
+
+## Finished Items
+
+### Namespace ioctl to translate a PID between PID namespaces
+
+[x] Namespace ioctl to translate a PID between PID namespaces
+
+**🙇 `ca567df74a28a9fb368c6b2d93e864113f73f5c2 ("nsfs: add pid translation ioctls")` 🙇**
+
+**Use-Case:** This makes it possible to e.g., figure out what a given PID in
+a PID namespace corresponds to in the caller's PID namespace. For example, to
+figure out what the PID of PID 1 inside of a given PID namespace is.
+
+### API to determine the parent process ID of a pidfd
+
+[x] API to determine the parent process ID of a pidfd
+
+An API to determine the parent process ID (ppid) of a pidfd would be
+good.
+
+This information is relevant to code dealing with pidfds, since if
+the ppid of a pidfd matches the process own pid it can call
+`waitid()` on the process, if it doesn't it cannot and such a call
+would fail. It would be very useful if this could be determined
+easily before even calling that syscall.
+
+**🙇 `cdda1f26e74b ("pidfd: add ioctl to retrieve pid info")` 🙇**
+
+**Usecase:** systemd manages a multitude of processes, most of which
+are its own children, but many which are not. It would be great if
+we could easily determine whether it is worth waiting for
+`SIGCHLD`/`waitid()` on them or whether waiting for `POLLIN` on
+them is the only way to get exit notification.
+
+### Set `comm` field before `exec()`
+
+[x] Set `comm` field before `exec()`
+
+There should be a way to control the process' `comm` field if
+started via `fexecve()`/`execveat()`.
+
+Right now, when `fexecve()`/`execveat()` is used, the `comm` field
+(i.e. `/proc/self/comm`) contains a name derived of the numeric fd,
+which breaks `ps -C …` and various other tools.  In particular when
+the fd was opened with `O_CLOEXEC`, the number of the fd in the old
+process is completely meaningless.
+
+The goal is add a way to tell `fexecve()`/`execveat()` what Name to use.
+
+Since `comm` is under user control anyway (via `PR_SET_NAME`), it
+should be safe to also make it somehow configurable at fexecve()
+time.
+
+See https://github.com/systemd/systemd/commit/35a926777e124ae8c2ac3cf46f44248b5e147294,
+https://github.com/systemd/systemd/commit/8939eeae528ef9b9ad2a21995279b76d382d5c81.
+
+**🙇 `543841d18060 ("exec: fix up /proc/pid/comm in the execveat(AT_EMPTY_PATH) case")` 🙇**
+
+**Usecase:** In systemd we generally would prefer using `fexecve()`
+to safely and race-freely invoke processes, but the fact that `comm`
+is useless after invoking a process that way makes the call
+unfortunately hard to use for systemd.
+
+### Make statx() on a pidfd return additional info
+
+Make statx() on a pidfd return additional recognizable identifiers in
+`.stx_btime`.
+
+**🙇 `cb12fd8e0dabb9a1c8aef55a6a41e2c255fcdf4b pidfd: add pidfs` 🙇**
+
+It would be fantastic if issuing statx() on any pidfd would return
+the start time of the process in `.stx_btime` even after the process
+died.
+
+These fields should in particular be queriable *after* the process
+already exited and has been reaped, i.e. after its PID has already
+been recycled.
+
+**Usecase:** In systemd we maintain lists of processes in a hash
+table. Right now, the key is the PID, but this is less than ideal
+because of PID recycling. By being able to use the `.stx_btime`
+and/or `.stx_ino` fields instead would be perfect to safely
+identify, track and compare process even after they ceased to exist.
+
+### Allow creating idmapped mounts from idmapped mounts
+
+[x] Allow creating idmapped mounts from idmapped mounts
+
+Add a new `OPEN_TREE_CLEAR` flag to `open_tree()` that can only be
+used in conjunction with `OPEN_TREE_CLONE`. When specified it will clear
+all mount properties from that mount including the mount's idmapping.
+Requires the caller to be `ns_capable(mntns->user_ns)`. If idmapped mounts
+are encountered the caller must be `ns_capable(sb->user_ns, CAP_SYS_ADMIN)`
+in the filesystems user namespace.
+
+Locked mount properties cannot be changed. A mount's idmapping becomes
+locked if it propagates across user namespaces.
+
+This is useful to get a new, clear mount and also allows the caller to
+create a new detached mount with an idmapping attached to the mount. Iow,
+the caller may idmap the mount afterwards.
+
+**🙇 `c4a16820d901 ("fs: add open_tree_attr()")` 🙇**
+
+**Use-Case:** A user may already use an idmapped mount for their home
+directory. And once a mount has been idmapped the idmapping cannot be
+changed anymore. This allows for simple semantics and allows to avoid
+lifetime complexity in order to account for scenarios where concurrent
+readers or writers might still use a given user namespace while it is about
+to be changed.
+But this poses a problem when the user wants to attach an idmapping to
+a mount that is already idmapped. The new flag allows to solve this
+problem. A sufficiently privileged user such as a container manager can
+create a user namespace for the container which expresses the desired
+ownership. Then they can create a new detached mount without any prior
+mount properties via OPEN_TREE_CLEAR and then attach the idmapping to this
+mount.
+
+### Require a user namespace to have an idmapping when attached
+
+[x] Require a user namespace to have an idmapping when attached
+
+Enforce that the user namespace about to be attached to a mount must
+have an idmapping written.
+
+**🙇 `dacfd001eaf2 ("fs/mnt_idmapping.c: Return -EINVAL when no map is written")` 🙇**
+
+**Use-Case:** Tighten the semantics.
+
+### Mount notifications without rescanning of `/proc/self/mountinfo`
+
+[x] Mount notifications without rescanning of `/proc/self/mountinfo`
+
+Mount notifications that do not require continuous rescanning of
+`/proc/self/mountinfo`. Currently, if a program wants to track
+mounts established on the system it can receive `poll()`able
+events via a file descriptor to `/proc/self/mountinfo`. When
+receiving them it needs to rescan the file from the top and
+compare it with the previous scan. This is both slow and
+racy. It's slow on systems with a large number of mounts as the
+cost for re-scanning the table has to be paid for every change to
+the mount table. It's racy because quickly added and removed
+mounts might not be noticed.
+
+**🙇 `0f46d81f2bce ("fanotify: notify on mount attach and detach")` 🙇**
+
+**Use-Case:** `systemd` tracks the mount table to integrate the mounts
+into it own dependency management.
+
+### Mount a subdirectory instead of the top-level directory
+
+[x] Mount a subdirectory instead of the top-level directory
+
+Ability to mount a subdirectory of a regular file system instead of
+the top-level directory. E.e. for a file system `/dev/sda1` which
+contains a sub-directory `/foobar` mount `/foobar` without having
+to mount its parent directory first. Consider something like this:
+
+```
+mount -t ext4 /dev/sda1 somedir/ -o subdir=/foobar
+```
+
+(This is of course already possible via some mount namespacing
+shenanigans, but this requires namespacing to be available, and is
+not precisely obvious to implement. Explicit kernel support at mount
+time would be much preferable.)
+
+**🙇 `c5c12f871a30 ("fs: create detached mounts from detached mounts")` 🙇**
+
+**Use-Case:** `systemd-homed` currently mounts a sub-directory of
+the per-user LUKS volume as the user's home directory (and not the
+root directory of the per-user LUKS volume's file system!), and in
+order to implement this invisibly from the host side requires a
+complex mount namespace exercise.
+
+### Unmounting of obstructed mounts
+
+[x] ability to unmount obstructed mounts. (this means: you have a stack
+of mounts on the very same inode, and you want to remove a mount in
+the middle. right now, you can only remove the topmost mount.)
+
+**🙇 instead of the ability to unmount obstructured mounts we gained
+the ability to mount beneath an existing mount, with mostly
+equivalent outcome. `6ac392815628f317fcfdca1a39df00b9cc4ebc8b
+("fs: allow to mount beneath top mount")` 🙇**
+
+**use-case:** this is useful for replacing mounts atomically, for
+example for upgrading versioned disk images: first an old version
+of the image is mounted. then a new version is mounted over the
+existing mount point, and then the lower mount point is
+removed. One such software would be `systemd-sysext`.
+
+### `SCM_PIDFD` auxiliary socket message
+
+[x] `SCM_PIDFD` or similar auxiliary socket message, that is a modern
+version of the `SCM_CREDS` message's `.pid` field, and provides a
+`pidfd` file descriptor to the originating peer process.
+
+**🙇 `5e2ff6704a275be00 ("scm: add SO_PASSPIDFD and SCM_PIDFD)")` 🙇**
+
+**Use-Case:** security infrastructure (such as PolicyKit) can safely
+reference clients this way without fearing PID
+recycling. `systemd-journald` can acquire peer metadata this way in
+a less racy fashion, in particular safe against PID recycling.
+
+### Take `IP_UNICAST_IF` into account for routing decisions
+
+[x] `IP_UNICAST_IF` should be taken into account for routing decisions
+at UDP `connect()` time (currently it isn't, only `SO_BINDTOINDEX`
+is, but that does so much more than just that, and one often
+doesn't want that)
+
+**🙇 `0e4d354762cefd3e16b4cff8988ff276e45effc4 ("net-next: Fix
+IP_UNICAST_IF option behavior for connected sockets")` 🙇**
+
+**Use-Case:** DNS resolvers that associate DNS configuration with
+specific network interfaces (example: `systemd-resolved`) typically
+want to preferably route DNS traffic to the per-interface DNS
+server via that interface, but not make further restrictions on the
+origins or received replies, and all that without
+privileges. `IP_UNICAST_IF` fulfills this role fine for TCP, but
+for UDP it is not taken into account for the `connect()` routing
+decision.
+
+### A system call to query information about a mount
+
+[x] Implement a mount-specific companion to `statx()` that puts at least the
+following information into `struct mount_info`:
+
+**🙇 `46eae99ef73302f9fb3dddcd67c374b3dffe8fd6 ("add statmount(2) syscall")` 🙇**
+
+* mount flags: `MOUNT_ATTR_RDONLY`, ...
+* time flags: `MOUNT_ATTR_RELATIME`, ...
+  Could probably be combined with mount flags.
+* propagation setting: `MS_SHARED)`, ...
+* peer group
+* mnt id of the mount
+* mnt id of the mount's parent
+* owning userns
+
+There's a bit more advanced stuff systemd would really want but which
+I think is misplaced in a mountinfo system call including:
+* list of primary and auxiliary block device major/minor
+* diskseq value of those device nodes (This is a new block device feature
+  we added that allows preventing device recycling issues when e.g.
+  removing usb devices very quickly and is needed for udev.)
+* uuid/fsid
+* feature flags (`O_TMPFILE`, `RENAME_EXCHANGE` supported etc.)
+
+**Use-Case:** low-level userspace tools have to interact with advanced
+mount information constantly. This is currently costly and brittel because
+they have to go and parse `/proc/<pid>/mountinfo`.
+
+### Security hook for `create_user_ns()`.
+
+[x] (kAPI) Add security hook to `create_user_ns()`.
+
+**🙇 `7cd4c5c2101c ("security, lsm: Introduce security_create_user_ns()")` 🙇**
+
+**Use-Case:** Allow LSMs to monitor user namespace creation.
+
+### Idmapped mounts for tmpfs
+
+[x] Support idmapped mounts for tmpfs
+
+**🙇 `7a80e5b8c6fa ("shmem: support idmapped mounts for tmpfs")` 🙇**
+
+**Use-Case:** Runtimes such as Kubernetes use a lot of `tmpfs` mounts of
+individual files or directories to expose information to containers/pods.
+Instead of having to change ownership permanently allow them to use an
+idmapped mount instead.
+
+@rata and @giuseppe brought this suggestion forward. For Kubernetes it is
+sufficient to support idmapped mounts of `tmpfs` instances mounted in the
+initial user namespace. However, in the future idmapped
+mounts of `tmpfs` instances mounted in user namespaces should be supported.
+Other container runtimes want to make use of this. The kernel is able to
+support this since at least `5.17`.
+
+Things to remember are that `tmpfs` mounts can serve as lower- or upper
+layers in `overlayfs` and care needs to be taken that this remains safe if
+idmapped mounts of `tmpfs` instances mounted in user namespaces are
+supported.
+
+### Additional identifiers for pidfds in `statx()`
+
+[x] Make `statx()` on a pidfd return additional recognizable identifiers
+in `.stx_ino`.
+
+**🙇 `cb12fd8e0dabb9a1c8aef55a6a41e2c255fcdf4b pidfd: add pidfs` 🙇**
+
+It would be fantastic if issuing statx() on any pidfd would return some
+reasonably stable 64bit identifier for the process in `.stx_ino`. This would
+be perfect to identify processes pinned by a pidfd, and compare them.
+
+### Namespace `binfmt_misc` filesystem
+
+[x] Make the `binfmt_misc` filesystem namespaced.
+
+**🙇 `21ca59b365c0 ("binfmt_misc: enable sandboxed mounts")` 🙇**
+
+**Use-Case:** Allow containers and sandboxes to register their own binfmt
+handlers.
+
+### Support idmapped mounts for `overlayfs`
+
+[x] Support idmapped mounts for `overlayfs`
+
+**🙇 `bc70682a497c ("ovl: support idmapped layers")` 🙇**
+
+**Use-Case:** Allow containers to use `overlayfs` with idmapped mounts.
+
+### Disabling reception of `SCM_RIGHTS` for `AF_UNIX` sockets
+
+[x] Ability to turn off `SCM_RIGHTS` reception for `AF_UNIX`
+sockets.
+
+**🙇 `77cbe1a6d8730a07f99f9263c2d5f2304cf5e830 ("af_unix: Introduce SO_PASSRIGHTS")` 🙇**
+
+Right now reception of file descriptors is always on when
+a process makes the mistake of invoking `recvmsg()` on such a
+socket. This is problematic since `SCM_RIGHTS` installs file
+descriptors in the recipient process' file descriptor
+table. Getting rid of these file descriptors is not necessarily
+easy, as they could refer to "slow-to-close" files (think: dirty
+file descriptor referring to a file on an unresponsive NFS server,
+or some device file descriptor), that might cause the recipient to
+block for a longer time when it tries to them. Programs reading
+from an `AF_UNIX` socket currently have three options:
+
+1. Never use `recvmsg()`, and stick to `read()`, `recv()` and
+   similar which do not install file descriptors in the recipients
+   file descriptor table.
+
+2. Ignore the problem, and simply `close()` the received file descriptors
+   it didn't expect, thus possibly locking up for a longer time.
+
+3. Fork off a thread that invokes `close()`, which mitigates the
+   risk of blocking, but still means a sender can cause resource
+   exhaustion in a recipient by flooding it with file descriptors,
+   as for each of them a thread needs to be spawned and a file
+   descriptor is taken while it is in the process of being closed.
+
+(Another option of course is to never talk `AF_UNIX` to peers that
+are not trusted to not send unexpected file descriptors.)
+
+A simple knob that allows turning off `SCM_RIGHTS` right reception
+would be useful to close this weakness, and would allow
+`recvmsg()` to be called without risking file descriptors to be
+installed in the file descriptor table, and thus risking a
+blocking `close()` or a form of potential resource exhaustion.
+
+**Use-Case:** any program that uses `AF_UNIX` sockets and uses (or
+would like to use) `recvmsg()` on it (which is useful to acquire
+other metadata). Example: logging daemons that want to collect
+timestamp or `SCM_CREDS` auxiliary data, or the D-Bus message
+broker and suchlike.
+
+### A reliable way to check for PID namespacing
+
+[x] A reliable (non-heuristic) way to detect from userspace if the
+current process is running in a PID namespace that is not the main
+PID namespace. PID namespaces are probably the primary type of
+namespace that identify a container environment. While many
+heuristics exist to determine generically whether one is executed
+inside a container, it would be good to have a correct,
+well-defined way to determine this.
 
-## License
+**🙇 The inode number of the root PID namespace is fixed (0xEFFFFFFC)
+and now considered API. It can be used to distinguish the root PID
+namespace from all others. 🙇**
 
-[MIT](LICENSE)
+**Use-Case:** tools such as `systemd-detect-virt` exist to determine
+container execution, but typically resolve to checking for
+specific implementations. It would be much nicer and universally
+applicable if such a check could be done generically. It would
+probably suffice to provide an `ioctl()` call on the `pidns` file
+descriptor that reveals this kind of information in some form.
diff --git a/website/README.md b/website/README.md
index 33eb354..a2c57c0 100644
--- a/website/README.md
+++ b/website/README.md
@@ -1,41 +1,29 @@
-# Website
+# Static website generation for UAPI group specifications
 
-This directory contains the Hugo-based static site for the kernel feature wishlist.
+This repository uses Hugo for static HTML generation.
+See https://gohugo.io/getting-started/quick-start/ for a brief intro.
 
-## Theme
+The website uses the [hugo-book](https://github.com/alex-shpak/hugo-book) theme; it is included in this repo as a git submodule.
+After cloning this repo please run `git submodule init; git submodule update`.
+If you check out a branch or tag, make sure the submodule is up to date by running `git submodule update`.
 
-The site uses the [hugo-book](https://github.com/alex-shpak/hugo-book) theme,
-included as a git submodule. After cloning, run:
+## Website repo layout
 
-```sh
-git submodule init && git submodule update
-```
-
-## Content layout
+Content resides in the [content](content/) folder.
+The top-level README.md is soft-linked to `content/_index.md` and serves as index page.
 
-```
-content/
-  _index.md              Landing page
-  in-progress/           Features being actively worked on
-  wishlist/              Ideas and proposals
-  completed/             Features merged into the kernel
-```
+To add content, either put files into the `content/docs` folder.
+Documents there are automatically added to the naviation menu on the left.
 
-Each feature is a separate Markdown file with YAML front matter containing
-`title`, `status`, `categories`, and optionally `commit` (for completed items).
+## Making changes and testing
 
-## Local development
+You'll need [hugo installed](https://gohugo.io/getting-started/installing/) for rendering changes.
 
-From this directory:
+First, make your edits.
+Then, start hugo locally (in the repo's `website` directory)to review your changes:
 
-```sh
-hugo server --minify --disableFastRender
+```shell
+$ hugo server --minify --disableFastRender
 ```
 
-Review at http://localhost:1313/kernel-features/ .
-
-## Build
-
-```sh
-hugo --minify -d ../public
-```
+Review your changes at http://localhost:1313/kernel/ .
diff --git a/website/archetypes/default.md b/website/archetypes/default.md
index 8d45989..00e77bd 100644
--- a/website/archetypes/default.md
+++ b/website/archetypes/default.md
@@ -1,8 +1,6 @@
 ---
 title: "{{ replace .Name "-" " " | title }}"
-weight: 10
-status: wishlist
-categories: []
+date: {{ .Date }}
+draft: true
 ---
 
-**Use-Case:**
diff --git a/website/assets/_custom.scss b/website/assets/_custom.scss
index fdbda08..3859722 100644
--- a/website/assets/_custom.scss
+++ b/website/assets/_custom.scss
@@ -1,73 +1,5 @@
-// Status badges
-.status-badge {
+nav>ul:last-of-type>li:last-child::before {
+  content: "⸻";
   display: inline-block;
-  padding: 0.15em 0.6em;
-  font-size: 0.8em;
-  font-weight: 600;
-  border-radius: 4px;
-  margin-bottom: 0.5em;
-  text-transform: uppercase;
-  letter-spacing: 0.03em;
-}
-
-.status-in-progress {
-  background-color: #fff3cd;
-  color: #856404;
-}
-
-.status-wishlist {
-  background-color: #d1ecf1;
-  color: #0c5460;
-}
-
-.status-completed {
-  background-color: #d4edda;
-  color: #155724;
-}
-
-// Category pills
-.category-pills {
-  margin-bottom: 1em;
-}
-
-.category-pill {
-  display: inline-block;
-  padding: 0.1em 0.5em;
-  font-size: 0.75em;
-  border-radius: 3px;
-  margin-right: 0.3em;
-  margin-bottom: 0.3em;
-  background-color: var(--gray-100);
-  color: var(--gray-500);
-  text-decoration: none;
-
-  &:hover {
-    background-color: var(--gray-200);
-    text-decoration: none;
-  }
-}
-
-// Separator before the last menu item
-.book-menu nav > ul > li:last-child {
-  border-top: 1px solid var(--gray-200);
-  padding-top: 0.5em;
-  margin-top: 0.5em;
-}
-
-// Typography refinements
-.book-page {
-  line-height: 1.7;
-}
-
-.markdown h1 {
-  margin-top: 0;
-}
-
-// Commit reference styling
-.commit-ref {
-  font-family: monospace;
-  font-size: 0.85em;
-  background-color: var(--gray-100);
-  padding: 0.15em 0.4em;
-  border-radius: 3px;
+  margin: 0.5em 0;
 }
diff --git a/website/assets/_variables.scss b/website/assets/_variables.scss
index ef64cfd..bc8d840 100644
--- a/website/assets/_variables.scss
+++ b/website/assets/_variables.scss
@@ -1,4 +1,5 @@
 $body-min-width: 20rem;
 $container-max-width: 110rem;
-$menu-width: 16rem;
-$toc-width: 16rem;
+
+$menu-width: 15rem;
+$toc-width: 15rem;
diff --git a/website/config.toml b/website/config.toml
index 37c7a2a..8702d75 100644
--- a/website/config.toml
+++ b/website/config.toml
@@ -1,13 +1,10 @@
 baseURL = "https://uapi-group.org/kernel-features"
 languageCode = "en-us"
-title = "UAPI Group Kernel Feature Wishlist"
+title = "UAPI Group Kernel Feature Ideas"
 theme = "hugo-book"
 
-[taxonomies]
-  category = "categories"
-
 [[menu.before]]
-  name = "Back to top"
+  name = "⬅️  Back to top"
   url = "/.."
   weight = 1
 
@@ -17,14 +14,17 @@ theme = "hugo-book"
   weight = 11
 
 [markup.goldmark.renderer]
-  unsafe = true
+  unsafe = true # Allow HTML in md files
 
 [params]
   BookPortableLinks = true
   BookRepo = 'https://github.com/uapi-group/kernel-features'
   BookCommitPath = 'commit'
-  BookEditLink = '{{ .Site.Params.BookRepo }}/edit/main/website/{{ .Path }}'
-  BookSection = '/'
+  BookEditLink = '{{ .Site.Params.BookRepo }}/edit/main/{{ .Path }}'
+  BookIndexPage = 'README.md'
   BookTheme = 'auto'
-  BookSearch = true
-  BookDateFormat = "January 2, 2006"
+
+[modules]
+  [[module.mounts]]
+    source = '../README.md'
+    target = 'content/_index.md'
diff --git a/website/content/_index.md b/website/content/_index.md
deleted file mode 100644
index 66891da..0000000
--- a/website/content/_index.md
+++ /dev/null
@@ -1,42 +0,0 @@
----
-title: "Kernel Feature Wishlist"
-type: docs
----
-
-# Kernel Feature Wishlist
-
-A curated collection of kernel feature ideas maintained by the
-[UAPI Group](https://uapi-group.org/). The items on this list are strictly
-ideas. It is especially important to not take the items on this list as being
-implementation requests. Some of the ideas are rather rough and unrefined.
-They serve as entry points for exploring the associated problem space.
-
-## Sections
-
-{{< columns >}}
-
-### [In Progress]({{< relref "/in-progress" >}})
-
-Features currently being designed or implemented by contributors.
-
-<--->
-
-### [Wishlist]({{< relref "/wishlist" >}})
-
-Ideas and proposals waiting for someone to pick them up.
-
-<--->
-
-### [Completed]({{< relref "/completed" >}})
-
-Features that have been merged into the kernel.
-
-{{< /columns >}}
-
-## Contributing
-
-See the [contribution guidelines](https://github.com/uapi-group/kernel-features/blob/main/CONTRIBUTING.md) for how to propose new features, claim existing ones, or mark items as completed.
-
-**When implementing ideas on this list or ideas inspired by this list,
-please point that out explicitly and clearly in the associated patches
-and Cc `Christian Brauner <brauner (at) kernel (dot) org>`.**
diff --git a/website/content/completed/_index.md b/website/content/completed/_index.md
deleted file mode 100644
index 456b38f..0000000
--- a/website/content/completed/_index.md
+++ /dev/null
@@ -1,7 +0,0 @@
----
-title: "Completed"
-weight: 3
-bookCollapseSection: true
----
-
-Features that have been implemented and merged into the Linux kernel.
diff --git a/website/content/completed/a-reliable-way-to-check-for-pid-namespacing.md b/website/content/completed/a-reliable-way-to-check-for-pid-namespacing.md
deleted file mode 100644
index 655d8a2..0000000
--- a/website/content/completed/a-reliable-way-to-check-for-pid-namespacing.md
+++ /dev/null
@@ -1,28 +0,0 @@
----
-title: "A reliable way to check for PID namespacing"
-weight: 190
-status: completed
-categories:
-  - filesystems
-  - namespaces
-  - processes
----
-
-[x] A reliable (non-heuristic) way to detect from userspace if the
-current process is running in a PID namespace that is not the main
-PID namespace. PID namespaces are probably the primary type of
-namespace that identify a container environment. While many
-heuristics exist to determine generically whether one is executed
-inside a container, it would be good to have a correct,
-well-defined way to determine this.
-
-**🙇 The inode number of the root PID namespace is fixed (0xEFFFFFFC)
-and now considered API. It can be used to distinguish the root PID
-namespace from all others. 🙇**
-
-**Use-Case:** tools such as `systemd-detect-virt` exist to determine
-container execution, but typically resolve to checking for
-specific implementations. It would be much nicer and universally
-applicable if such a check could be done generically. It would
-probably suffice to provide an `ioctl()` call on the `pidns` file
-descriptor that reveals this kind of information in some form.
diff --git a/website/content/completed/a-system-call-to-query-information-about-a-mount.md b/website/content/completed/a-system-call-to-query-information-about-a-mount.md
deleted file mode 100644
index f800319..0000000
--- a/website/content/completed/a-system-call-to-query-information-about-a-mount.md
+++ /dev/null
@@ -1,37 +0,0 @@
----
-title: "A system call to query information about a mount"
-weight: 120
-status: completed
-categories:
-  - block-devices
-  - mounts
-  - namespaces
-commit: "46eae99ef733"
----
-
-[x] Implement a mount-specific companion to `statx()` that puts at least the
-following information into `struct mount_info`:
-
-**🙇 `46eae99ef73302f9fb3dddcd67c374b3dffe8fd6 ("add statmount(2) syscall")` 🙇**
-
-* mount flags: `MOUNT_ATTR_RDONLY`, ...
-* time flags: `MOUNT_ATTR_RELATIME`, ...
-  Could probably be combined with mount flags.
-* propagation setting: `MS_SHARED)`, ...
-* peer group
-* mnt id of the mount
-* mnt id of the mount's parent
-* owning userns
-
-There's a bit more advanced stuff systemd would really want but which
-I think is misplaced in a mountinfo system call including:
-* list of primary and auxiliary block device major/minor
-* diskseq value of those device nodes (This is a new block device feature
-  we added that allows preventing device recycling issues when e.g.
-  removing usb devices very quickly and is needed for udev.)
-* uuid/fsid
-* feature flags (`O_TMPFILE`, `RENAME_EXCHANGE` supported etc.)
-
-**Use-Case:** low-level userspace tools have to interact with advanced
-mount information constantly. This is currently costly and brittel because
-they have to go and parse `/proc/<pid>/mountinfo`.
diff --git a/website/content/completed/additional-identifiers-for-pidfds-in-statx.md b/website/content/completed/additional-identifiers-for-pidfds-in-statx.md
deleted file mode 100644
index 3541009..0000000
--- a/website/content/completed/additional-identifiers-for-pidfds-in-statx.md
+++ /dev/null
@@ -1,18 +0,0 @@
----
-title: "Additional identifiers for pidfds in `statx()`"
-weight: 150
-status: completed
-categories:
-  - pidfd
-  - processes
-commit: "cb12fd8e0dab"
----
-
-[x] Make `statx()` on a pidfd return additional recognizable identifiers
-in `.stx_ino`.
-
-**🙇 `cb12fd8e0dabb9a1c8aef55a6a41e2c255fcdf4b pidfd: add pidfs` 🙇**
-
-It would be fantastic if issuing statx() on any pidfd would return some
-reasonably stable 64bit identifier for the process in `.stx_ino`. This would
-be perfect to identify processes pinned by a pidfd, and compare them.
diff --git a/website/content/completed/allow-creating-idmapped-mounts-from-idmapped-mounts.md b/website/content/completed/allow-creating-idmapped-mounts-from-idmapped-mounts.md
deleted file mode 100644
index 43163a9..0000000
--- a/website/content/completed/allow-creating-idmapped-mounts-from-idmapped-mounts.md
+++ /dev/null
@@ -1,43 +0,0 @@
----
-title: "Allow creating idmapped mounts from idmapped mounts"
-weight: 50
-status: completed
-categories:
-  - filesystems
-  - mounts
-  - namespaces
-  - security
-commit: "c4a16820d901"
----
-
-[x] Allow creating idmapped mounts from idmapped mounts
-
-Add a new `OPEN_TREE_CLEAR` flag to `open_tree()` that can only be
-used in conjunction with `OPEN_TREE_CLONE`. When specified it will clear
-all mount properties from that mount including the mount's idmapping.
-Requires the caller to be `ns_capable(mntns->user_ns)`. If idmapped mounts
-are encountered the caller must be `ns_capable(sb->user_ns, CAP_SYS_ADMIN)`
-in the filesystems user namespace.
-
-Locked mount properties cannot be changed. A mount's idmapping becomes
-locked if it propagates across user namespaces.
-
-This is useful to get a new, clear mount and also allows the caller to
-create a new detached mount with an idmapping attached to the mount. Iow,
-the caller may idmap the mount afterwards.
-
-**🙇 `c4a16820d901 ("fs: add open_tree_attr()")` 🙇**
-
-**Use-Case:** A user may already use an idmapped mount for their home
-directory. And once a mount has been idmapped the idmapping cannot be
-changed anymore. This allows for simple semantics and allows to avoid
-lifetime complexity in order to account for scenarios where concurrent
-readers or writers might still use a given user namespace while it is about
-to be changed.
-But this poses a problem when the user wants to attach an idmapping to
-a mount that is already idmapped. The new flag allows to solve this
-problem. A sufficiently privileged user such as a container manager can
-create a user namespace for the container which expresses the desired
-ownership. Then they can create a new detached mount without any prior
-mount properties via OPEN_TREE_CLEAR and then attach the idmapping to this
-mount.
diff --git a/website/content/completed/api-to-determine-the-parent-process-id-of-a-pidfd.md b/website/content/completed/api-to-determine-the-parent-process-id-of-a-pidfd.md
deleted file mode 100644
index 2c61dc7..0000000
--- a/website/content/completed/api-to-determine-the-parent-process-id-of-a-pidfd.md
+++ /dev/null
@@ -1,28 +0,0 @@
----
-title: "API to determine the parent process ID of a pidfd"
-weight: 20
-status: completed
-categories:
-  - pidfd
-  - processes
-commit: "cdda1f26e74b"
----
-
-[x] API to determine the parent process ID of a pidfd
-
-An API to determine the parent process ID (ppid) of a pidfd would be
-good.
-
-This information is relevant to code dealing with pidfds, since if
-the ppid of a pidfd matches the process own pid it can call
-`waitid()` on the process, if it doesn't it cannot and such a call
-would fail. It would be very useful if this could be determined
-easily before even calling that syscall.
-
-**🙇 `cdda1f26e74b ("pidfd: add ioctl to retrieve pid info")` 🙇**
-
-**Use-Case:** systemd manages a multitude of processes, most of which
-are its own children, but many which are not. It would be great if
-we could easily determine whether it is worth waiting for
-`SIGCHLD`/`waitid()` on them or whether waiting for `POLLIN` on
-them is the only way to get exit notification.
diff --git a/website/content/completed/disabling-reception-of-scm-rights-for-af-unix-sockets.md b/website/content/completed/disabling-reception-of-scm-rights-for-af-unix-sockets.md
deleted file mode 100644
index 57a5aca..0000000
--- a/website/content/completed/disabling-reception-of-scm-rights-for-af-unix-sockets.md
+++ /dev/null
@@ -1,53 +0,0 @@
----
-title: "Disabling reception of `SCM_RIGHTS` for `AF_UNIX` sockets"
-weight: 180
-status: completed
-categories:
-  - processes
-  - sockets
-commit: "77cbe1a6d873"
----
-
-[x] Ability to turn off `SCM_RIGHTS` reception for `AF_UNIX`
-sockets.
-
-**🙇 `77cbe1a6d8730a07f99f9263c2d5f2304cf5e830 ("af_unix: Introduce SO_PASSRIGHTS")` 🙇**
-
-Right now reception of file descriptors is always on when
-a process makes the mistake of invoking `recvmsg()` on such a
-socket. This is problematic since `SCM_RIGHTS` installs file
-descriptors in the recipient process' file descriptor
-table. Getting rid of these file descriptors is not necessarily
-easy, as they could refer to "slow-to-close" files (think: dirty
-file descriptor referring to a file on an unresponsive NFS server,
-or some device file descriptor), that might cause the recipient to
-block for a longer time when it tries to them. Programs reading
-from an `AF_UNIX` socket currently have three options:
-
-1. Never use `recvmsg()`, and stick to `read()`, `recv()` and
-   similar which do not install file descriptors in the recipients
-   file descriptor table.
-
-2. Ignore the problem, and simply `close()` the received file descriptors
-   it didn't expect, thus possibly locking up for a longer time.
-
-3. Fork off a thread that invokes `close()`, which mitigates the
-   risk of blocking, but still means a sender can cause resource
-   exhaustion in a recipient by flooding it with file descriptors,
-   as for each of them a thread needs to be spawned and a file
-   descriptor is taken while it is in the process of being closed.
-
-(Another option of course is to never talk `AF_UNIX` to peers that
-are not trusted to not send unexpected file descriptors.)
-
-A simple knob that allows turning off `SCM_RIGHTS` right reception
-would be useful to close this weakness, and would allow
-`recvmsg()` to be called without risking file descriptors to be
-installed in the file descriptor table, and thus risking a
-blocking `close()` or a form of potential resource exhaustion.
-
-**Use-Case:** any program that uses `AF_UNIX` sockets and uses (or
-would like to use) `recvmsg()` on it (which is useful to acquire
-other metadata). Example: logging daemons that want to collect
-timestamp or `SCM_CREDS` auxiliary data, or the D-Bus message
-broker and suchlike.
diff --git a/website/content/completed/idmapped-mounts-for-tmpfs.md b/website/content/completed/idmapped-mounts-for-tmpfs.md
deleted file mode 100644
index ab67b7e..0000000
--- a/website/content/completed/idmapped-mounts-for-tmpfs.md
+++ /dev/null
@@ -1,31 +0,0 @@
----
-title: "Idmapped mounts for tmpfs"
-weight: 140
-status: completed
-categories:
-  - filesystems
-  - mounts
-  - namespaces
-commit: "7a80e5b8c6fa"
----
-
-[x] Support idmapped mounts for tmpfs
-
-**🙇 `7a80e5b8c6fa ("shmem: support idmapped mounts for tmpfs")` 🙇**
-
-**Use-Case:** Runtimes such as Kubernetes use a lot of `tmpfs` mounts of
-individual files or directories to expose information to containers/pods.
-Instead of having to change ownership permanently allow them to use an
-idmapped mount instead.
-
-@rata and @giuseppe brought this suggestion forward. For Kubernetes it is
-sufficient to support idmapped mounts of `tmpfs` instances mounted in the
-initial user namespace. However, in the future idmapped
-mounts of `tmpfs` instances mounted in user namespaces should be supported.
-Other container runtimes want to make use of this. The kernel is able to
-support this since at least `5.17`.
-
-Things to remember are that `tmpfs` mounts can serve as lower- or upper
-layers in `overlayfs` and care needs to be taken that this remains safe if
-idmapped mounts of `tmpfs` instances mounted in user namespaces are
-supported.
diff --git a/website/content/completed/make-statx-on-a-pidfd-return-additional-info.md b/website/content/completed/make-statx-on-a-pidfd-return-additional-info.md
deleted file mode 100644
index fe169d9..0000000
--- a/website/content/completed/make-statx-on-a-pidfd-return-additional-info.md
+++ /dev/null
@@ -1,28 +0,0 @@
----
-title: "Make statx() on a pidfd return additional info"
-weight: 40
-status: completed
-categories:
-  - pidfd
-  - processes
-commit: "cb12fd8e0dab"
----
-
-Make statx() on a pidfd return additional recognizable identifiers in
-`.stx_btime`.
-
-**🙇 `cb12fd8e0dabb9a1c8aef55a6a41e2c255fcdf4b pidfd: add pidfs` 🙇**
-
-It would be fantastic if issuing statx() on any pidfd would return
-the start time of the process in `.stx_btime` even after the process
-died.
-
-These fields should in particular be queriable *after* the process
-already exited and has been reaped, i.e. after its PID has already
-been recycled.
-
-**Use-Case:** In systemd we maintain lists of processes in a hash
-table. Right now, the key is the PID, but this is less than ideal
-because of PID recycling. By being able to use the `.stx_btime`
-and/or `.stx_ino` fields instead would be perfect to safely
-identify, track and compare process even after they ceased to exist.
diff --git a/website/content/completed/mount-a-subdirectory-instead-of-the-top-level-directory.md b/website/content/completed/mount-a-subdirectory-instead-of-the-top-level-directory.md
deleted file mode 100644
index b417977..0000000
--- a/website/content/completed/mount-a-subdirectory-instead-of-the-top-level-directory.md
+++ /dev/null
@@ -1,34 +0,0 @@
----
-title: "Mount a subdirectory instead of the top-level directory"
-weight: 80
-status: completed
-categories:
-  - filesystems
-  - mounts
-  - namespaces
-commit: "c5c12f871a30"
----
-
-[x] Mount a subdirectory instead of the top-level directory
-
-Ability to mount a subdirectory of a regular file system instead of
-the top-level directory. E.e. for a file system `/dev/sda1` which
-contains a sub-directory `/foobar` mount `/foobar` without having
-to mount its parent directory first. Consider something like this:
-
-```
-mount -t ext4 /dev/sda1 somedir/ -o subdir=/foobar
-```
-
-(This is of course already possible via some mount namespacing
-shenanigans, but this requires namespacing to be available, and is
-not precisely obvious to implement. Explicit kernel support at mount
-time would be much preferable.)
-
-**🙇 `c5c12f871a30 ("fs: create detached mounts from detached mounts")` 🙇**
-
-**Use-Case:** `systemd-homed` currently mounts a sub-directory of
-the per-user LUKS volume as the user's home directory (and not the
-root directory of the per-user LUKS volume's file system!), and in
-order to implement this invisibly from the host side requires a
-complex mount namespace exercise.
diff --git a/website/content/completed/mount-notifications-without-rescanning-of-proc-self-mountinfo.md b/website/content/completed/mount-notifications-without-rescanning-of-proc-self-mountinfo.md
deleted file mode 100644
index 0fdc2c5..0000000
--- a/website/content/completed/mount-notifications-without-rescanning-of-proc-self-mountinfo.md
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Mount notifications without rescanning of `/proc/self/mountinfo`"
-weight: 70
-status: completed
-categories:
-  - mounts
-commit: "0f46d81f2bce"
----
-
-[x] Mount notifications without rescanning of `/proc/self/mountinfo`
-
-Mount notifications that do not require continuous rescanning of
-`/proc/self/mountinfo`. Currently, if a program wants to track
-mounts established on the system it can receive `poll()`able
-events via a file descriptor to `/proc/self/mountinfo`. When
-receiving them it needs to rescan the file from the top and
-compare it with the previous scan. This is both slow and
-racy. It's slow on systems with a large number of mounts as the
-cost for re-scanning the table has to be paid for every change to
-the mount table. It's racy because quickly added and removed
-mounts might not be noticed.
-
-**🙇 `0f46d81f2bce ("fanotify: notify on mount attach and detach")` 🙇**
-
-**Use-Case:** `systemd` tracks the mount table to integrate the mounts
-into it own dependency management.
diff --git a/website/content/completed/namespace-binfmt-misc-filesystem.md b/website/content/completed/namespace-binfmt-misc-filesystem.md
deleted file mode 100644
index 39c2ef6..0000000
--- a/website/content/completed/namespace-binfmt-misc-filesystem.md
+++ /dev/null
@@ -1,17 +0,0 @@
----
-title: "Namespace `binfmt_misc` filesystem"
-weight: 160
-status: completed
-categories:
-  - filesystems
-  - mounts
-  - namespaces
-commit: "21ca59b365c0"
----
-
-[x] Make the `binfmt_misc` filesystem namespaced.
-
-**🙇 `21ca59b365c0 ("binfmt_misc: enable sandboxed mounts")` 🙇**
-
-**Use-Case:** Allow containers and sandboxes to register their own binfmt
-handlers.
diff --git a/website/content/completed/namespace-ioctl-to-translate-a-pid-between-pid-namespaces.md b/website/content/completed/namespace-ioctl-to-translate-a-pid-between-pid-namespaces.md
deleted file mode 100644
index a5f31ca..0000000
--- a/website/content/completed/namespace-ioctl-to-translate-a-pid-between-pid-namespaces.md
+++ /dev/null
@@ -1,17 +0,0 @@
----
-title: "Namespace ioctl to translate a PID between PID namespaces"
-weight: 10
-status: completed
-categories:
-  - namespaces
-  - processes
-commit: "ca567df74a28"
----
-
-[x] Namespace ioctl to translate a PID between PID namespaces
-
-**🙇 `ca567df74a28a9fb368c6b2d93e864113f73f5c2 ("nsfs: add pid translation ioctls")` 🙇**
-
-**Use-Case:** This makes it possible to e.g., figure out what a given PID in
-a PID namespace corresponds to in the caller's PID namespace. For example, to
-figure out what the PID of PID 1 inside of a given PID namespace is.
diff --git a/website/content/completed/require-a-user-namespace-to-have-an-idmapping-when-attached.md b/website/content/completed/require-a-user-namespace-to-have-an-idmapping-when-attached.md
deleted file mode 100644
index 093a850..0000000
--- a/website/content/completed/require-a-user-namespace-to-have-an-idmapping-when-attached.md
+++ /dev/null
@@ -1,18 +0,0 @@
----
-title: "Require a user namespace to have an idmapping when attached"
-weight: 60
-status: completed
-categories:
-  - mounts
-  - namespaces
-commit: "dacfd001eaf2"
----
-
-[x] Require a user namespace to have an idmapping when attached
-
-Enforce that the user namespace about to be attached to a mount must
-have an idmapping written.
-
-**🙇 `dacfd001eaf2 ("fs/mnt_idmapping.c: Return -EINVAL when no map is written")` 🙇**
-
-**Use-Case:** Tighten the semantics.
diff --git a/website/content/completed/scm-pidfd-auxiliary-socket-message.md b/website/content/completed/scm-pidfd-auxiliary-socket-message.md
deleted file mode 100644
index 7878f02..0000000
--- a/website/content/completed/scm-pidfd-auxiliary-socket-message.md
+++ /dev/null
@@ -1,21 +0,0 @@
----
-title: "`SCM_PIDFD` auxiliary socket message"
-weight: 100
-status: completed
-categories:
-  - pidfd
-  - processes
-  - sockets
-commit: "5e2ff6704a27"
----
-
-[x] `SCM_PIDFD` or similar auxiliary socket message, that is a modern
-version of the `SCM_CREDS` message's `.pid` field, and provides a
-`pidfd` file descriptor to the originating peer process.
-
-**🙇 `5e2ff6704a275be00 ("scm: add SO_PASSPIDFD and SCM_PIDFD)")` 🙇**
-
-**Use-Case:** security infrastructure (such as PolicyKit) can safely
-reference clients this way without fearing PID
-recycling. `systemd-journald` can acquire peer metadata this way in
-a less racy fashion, in particular safe against PID recycling.
diff --git a/website/content/completed/security-hook-for-create-user-ns.md b/website/content/completed/security-hook-for-create-user-ns.md
deleted file mode 100644
index acbb47d..0000000
--- a/website/content/completed/security-hook-for-create-user-ns.md
+++ /dev/null
@@ -1,15 +0,0 @@
----
-title: "Security hook for `create_user_ns()`"
-weight: 130
-status: completed
-categories:
-  - namespaces
-  - security
-commit: "7cd4c5c2101c"
----
-
-[x] (kAPI) Add security hook to `create_user_ns()`.
-
-**🙇 `7cd4c5c2101c ("security, lsm: Introduce security_create_user_ns()")` 🙇**
-
-**Use-Case:** Allow LSMs to monitor user namespace creation.
diff --git a/website/content/completed/set-comm-field-before-exec.md b/website/content/completed/set-comm-field-before-exec.md
deleted file mode 100644
index 8f1696a..0000000
--- a/website/content/completed/set-comm-field-before-exec.md
+++ /dev/null
@@ -1,35 +0,0 @@
----
-title: "Set `comm` field before `exec()`"
-weight: 30
-status: completed
-categories:
-  - processes
-commit: "543841d18060"
----
-
-[x] Set `comm` field before `exec()`
-
-There should be a way to control the process' `comm` field if
-started via `fexecve()`/`execveat()`.
-
-Right now, when `fexecve()`/`execveat()` is used, the `comm` field
-(i.e. `/proc/self/comm`) contains a name derived of the numeric fd,
-which breaks `ps -C …` and various other tools.  In particular when
-the fd was opened with `O_CLOEXEC`, the number of the fd in the old
-process is completely meaningless.
-
-The goal is add a way to tell `fexecve()`/`execveat()` what Name to use.
-
-Since `comm` is under user control anyway (via `PR_SET_NAME`), it
-should be safe to also make it somehow configurable at fexecve()
-time.
-
-See https://github.com/systemd/systemd/commit/35a926777e124ae8c2ac3cf46f44248b5e147294,
-https://github.com/systemd/systemd/commit/8939eeae528ef9b9ad2a21995279b76d382d5c81.
-
-**🙇 `543841d18060 ("exec: fix up /proc/pid/comm in the execveat(AT_EMPTY_PATH) case")` 🙇**
-
-**Use-Case:** In systemd we generally would prefer using `fexecve()`
-to safely and race-freely invoke processes, but the fact that `comm`
-is useless after invoking a process that way makes the call
-unfortunately hard to use for systemd.
diff --git a/website/content/completed/support-idmapped-mounts-for-overlayfs.md b/website/content/completed/support-idmapped-mounts-for-overlayfs.md
deleted file mode 100644
index 790c21e..0000000
--- a/website/content/completed/support-idmapped-mounts-for-overlayfs.md
+++ /dev/null
@@ -1,15 +0,0 @@
----
-title: "Support idmapped mounts for `overlayfs`"
-weight: 170
-status: completed
-categories:
-  - filesystems
-  - mounts
-commit: "bc70682a497c"
----
-
-[x] Support idmapped mounts for `overlayfs`
-
-**🙇 `bc70682a497c ("ovl: support idmapped layers")` 🙇**
-
-**Use-Case:** Allow containers to use `overlayfs` with idmapped mounts.
diff --git a/website/content/completed/take-ip-unicast-if-into-account-for-routing-decisions.md b/website/content/completed/take-ip-unicast-if-into-account-for-routing-decisions.md
deleted file mode 100644
index d5e2413..0000000
--- a/website/content/completed/take-ip-unicast-if-into-account-for-routing-decisions.md
+++ /dev/null
@@ -1,25 +0,0 @@
----
-title: "Take `IP_UNICAST_IF` into account for routing decisions"
-weight: 110
-status: completed
-categories:
-  - sockets
-commit: "0e4d354762ce"
----
-
-[x] `IP_UNICAST_IF` should be taken into account for routing decisions
-at UDP `connect()` time (currently it isn't, only `SO_BINDTOINDEX`
-is, but that does so much more than just that, and one often
-doesn't want that)
-
-**🙇 `0e4d354762cefd3e16b4cff8988ff276e45effc4 ("net-next: Fix
-IP_UNICAST_IF option behavior for connected sockets")` 🙇**
-
-**Use-Case:** DNS resolvers that associate DNS configuration with
-specific network interfaces (example: `systemd-resolved`) typically
-want to preferably route DNS traffic to the per-interface DNS
-server via that interface, but not make further restrictions on the
-origins or received replies, and all that without
-privileges. `IP_UNICAST_IF` fulfills this role fine for TCP, but
-for UDP it is not taken into account for the `connect()` routing
-decision.
diff --git a/website/content/completed/unmounting-of-obstructed-mounts.md b/website/content/completed/unmounting-of-obstructed-mounts.md
deleted file mode 100644
index a349679..0000000
--- a/website/content/completed/unmounting-of-obstructed-mounts.md
+++ /dev/null
@@ -1,24 +0,0 @@
----
-title: "Unmounting of obstructed mounts"
-weight: 90
-status: completed
-categories:
-  - filesystems
-  - mounts
-commit: "6ac392815628"
----
-
-[x] ability to unmount obstructed mounts. (this means: you have a stack
-of mounts on the very same inode, and you want to remove a mount in
-the middle. right now, you can only remove the topmost mount.)
-
-**🙇 instead of the ability to unmount obstructured mounts we gained
-the ability to mount beneath an existing mount, with mostly
-equivalent outcome. `6ac392815628f317fcfdca1a39df00b9cc4ebc8b
-("fs: allow to mount beneath top mount")` 🙇**
-
-**use-case:** this is useful for replacing mounts atomically, for
-example for upgrading versioned disk images: first an old version
-of the image is mounted. then a new version is mounted over the
-existing mount point, and then the lower mount point is
-removed. One such software would be `systemd-sysext`.
diff --git a/website/content/in-progress/_index.md b/website/content/in-progress/_index.md
deleted file mode 100644
index 4c98074..0000000
--- a/website/content/in-progress/_index.md
+++ /dev/null
@@ -1,9 +0,0 @@
----
-title: "In Progress"
-weight: 1
-bookCollapseSection: true
----
-
-Features currently being designed or implemented by contributors.
-
-If you are working on one of these, please add your GitHub handle or email address so we can coordinate.
diff --git a/website/content/in-progress/ability-to-put-user-xattrs-on-s-ifsock-socket-entrypoint-inodes-in-the-file-system.md b/website/content/in-progress/ability-to-put-user-xattrs-on-s-ifsock-socket-entrypoint-inodes-in-the-file-system.md
deleted file mode 100644
index 8bdf205..0000000
--- a/website/content/in-progress/ability-to-put-user-xattrs-on-s-ifsock-socket-entrypoint-inodes-in-the-file-system.md
+++ /dev/null
@@ -1,41 +0,0 @@
----
-title: "Ability to put user xattrs on `S_IFSOCK` socket entrypoint inodes in the file system"
-weight: 20
-status: in-progress
-categories:
-  - filesystems
-  - mounts
-  - namespaces
-  - sockets
----
-
-Currently, the kernel only allows extended attributes in the
-`user.*` namespace to be attached to directory and regular file
-inodes. It would be tremendously useful to allow them to be
-associated with socket inodes, too.
-
-**Use-Case:** There are two syslog RFCs in use today: RFC3164 and
-RFC5424. `glibc`'s `syslog()` API generates events close to the
-former, but there are programs which would like to generate the
-latter instead (as it supports structured logging). The two formats
-are not backwards compatible: a client sending RFC5424 messages to a
-server only understanding RFC3164 will cause an ugly mess. On Linux
-there's only a single `/dev/log` AF_UNIX/SOCK_DGRAM socket backing
-`syslog()`, which is used in a one-way, fire-and-forget style. This
-means that feature negotation is not really possible within the
-protocol. Various tools bind mount the socket inode into `chroot()`
-and container environments, hence it would be fantastic to associate
-supported feature information directly with the inode (and thus
-outside of the protocol) to make it easy for clients to determine
-which features are spoken on a socket, in a way that survives bind
-mounts. Implementation idea would be that syslog daemons
-implementing RFC5425 could simply set an xattr `user.rfc5424` to `1`
-(or something like that) on the socket inode, and clearly inform
-clients in a natural and simple way that they'd be happy to parse
-the newer format. Also see:
-https://github.com/systemd/systemd/issues/19251 – This idea could
-also be extended to other sockets and other protocols: by setting
-some extended attribute on a socket inodes, services could advertise
-which protocols they support on them. For example D-Bus sockets
-could carry `user.dbus` set to `1`, and Varlink sockets
-`user.varlink` set to `1` and so on.
diff --git a/website/content/in-progress/add-immutable-rootfs-nullfs.md b/website/content/in-progress/add-immutable-rootfs-nullfs.md
deleted file mode 100644
index 9ec9588..0000000
--- a/website/content/in-progress/add-immutable-rootfs-nullfs.md
+++ /dev/null
@@ -1,37 +0,0 @@
----
-title: "Add immutable rootfs (`nullfs`)"
-weight: 50
-status: in-progress
-categories:
-  - filesystems
-  - mounts
-  - namespaces
-  - processes
-  - security
----
-
-Currently `pivot_root()` doesn't work on the real rootfs because it
-cannot be unmounted. Userspace has to do a recursive removal of the
-initramfs contents manually before continuing the boot.
-
-Add an immutable rootfs called `nullfs` that serves as the parent mount
-for anything that is actually useful such as the tmpfs or ramfs for
-initramfs unpacking or the rootfs itself. The kernel mounts a
-tmpfs/ramfs on top of it, unpacks the initramfs and fires up userspace
-which mounts the rootfs and can then simply do:
-
-```c
-chdir(rootfs);
-pivot_root(".", ".");
-umount2(".", MNT_DETACH);
-```
-
-This also means that the rootfs mount in unprivileged namespaces doesn't
-need to become `MNT_LOCKED` anymore as it's guaranteed that the
-immutable rootfs remains permanently empty so there cannot be anything
-revealed by unmounting the covering mount.
-
-**Use-Case:** Simplifies the boot process by enabling `pivot_root()` to
-work directly on the real rootfs. Removes the need for traditional
-`switch_root` workarounds. In the future this also allows us to create
-completely empty mount namespaces without risking to leak anything.
diff --git a/website/content/in-progress/allow-move-mount-beneath-on-the-rootfs.md b/website/content/in-progress/allow-move-mount-beneath-on-the-rootfs.md
deleted file mode 100644
index 6565eed..0000000
--- a/website/content/in-progress/allow-move-mount-beneath-on-the-rootfs.md
+++ /dev/null
@@ -1,48 +0,0 @@
----
-title: "Allow `MOVE_MOUNT_BENEATH` on the rootfs"
-weight: 60
-status: in-progress
-categories:
-  - filesystems
-  - mounts
-  - namespaces
-  - processes
----
-
-Allow `MOVE_MOUNT_BENEATH` to target the caller's rootfs, enabling
-root-switching without `pivot_root(2)`. The traditional approach to
-switching the rootfs involves `pivot_root(2)` or a `chroot_fs_refs()`-based
-mechanism that atomically updates `fs->root` for all tasks sharing the
-same `fs_struct`. This has consequences for `fork()`, `unshare(CLONE_FS)`,
-and `setns()`.
-
-Instead, decompose root-switching into individually atomic, locally-scoped
-steps:
-
-```c
-fd_tree = open_tree(-EBADF, "/newroot",
-                    OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC);
-fchdir(fd_tree);
-move_mount(fd_tree, "", AT_FDCWD, "/",
-           MOVE_MOUNT_BENEATH | MOVE_MOUNT_F_EMPTY_PATH);
-chroot(".");
-umount2(".", MNT_DETACH);
-```
-
-Since each step only modifies the caller's own state, the
-`fork()`/`unshare()`/`setns()` races are eliminated by design.
-
-To make this work, `MNT_LOCKED` is transferred from the top mount to the
-mount beneath. The new mount takes over the job of protecting the parent
-mount from being revealed. This also makes it possible to safely modify
-an inherited mount table after `unshare(CLONE_NEWUSER | CLONE_NEWNS)`:
-
-```sh
-mount --beneath -t tmpfs tmpfs /proc
-umount -l /proc
-```
-
-**Use-Case:** Containers created with `unshare(CLONE_NEWUSER | CLONE_NEWNS)`
-can reshuffle an inherited mount table safely. `MOVE_MOUNT_BENEATH` on the
-rootfs makes it possible to switch out the rootfs without the costly
-`pivot_root(2)` and without cross-namespace vulnerabilities.
diff --git a/website/content/in-progress/create-empty-mount-namespaces-via-unshare-unshare-empty-mntns-and-clone3-clone-empty-mntns.md b/website/content/in-progress/create-empty-mount-namespaces-via-unshare-unshare-empty-mntns-and-clone3-clone-empty-mntns.md
deleted file mode 100644
index 136da23..0000000
--- a/website/content/in-progress/create-empty-mount-namespaces-via-unshare-unshare-empty-mntns-and-clone3-clone-empty-mntns.md
+++ /dev/null
@@ -1,18 +0,0 @@
----
-title: "Create empty mount namespaces via `unshare(UNSHARE_EMPTY_MNTNS)` and `clone3(CLONE_EMPTY_MNTNS)`"
-weight: 10
-status: in-progress
-categories:
-  - filesystems
-  - mounts
-  - namespaces
-  - processes
----
-
-Now that we have support for `nullfs` it is trivial to allow the
-creation of completely empty mount namespaces, i.e., mount namespaces
-that only have the `nullfs` mount located at it's root.
-
-**Use-Case:** This allows to isolate tasks in completely empty mount
-namespaces. It also allows the caller to avoid copying its current mount
-table which is useless in the majority of container workload cases.
diff --git a/website/content/in-progress/create-mount-namespace-with-custom-rootfs-via-open-tree-and-fsmount.md b/website/content/in-progress/create-mount-namespace-with-custom-rootfs-via-open-tree-and-fsmount.md
deleted file mode 100644
index 39b182f..0000000
--- a/website/content/in-progress/create-mount-namespace-with-custom-rootfs-via-open-tree-and-fsmount.md
+++ /dev/null
@@ -1,39 +0,0 @@
----
-title: "Create mount namespace with custom rootfs via `open_tree()` and `fsmount()`"
-weight: 40
-status: in-progress
-categories:
-  - mounts
-  - namespaces
-  - processes
----
-
-Add `OPEN_TREE_NAMESPACE` flag to `open_tree()` and `FSMOUNT_NAMESPACE` flag
-to `fsmount()` that create a new mount namespace with the specified mount tree
-as the rootfs mounted on top of a copy of the real rootfs. These return a
-namespace file descriptor instead of a mount file descriptor.
-
-This allows `OPEN_TREE_NAMESPACE` to function as a combined
-`unshare(CLONE_NEWNS)` and `pivot_root()`.
-
-When creating containers the setup usually involves using `CLONE_NEWNS` via
-`clone3()` or `unshare()`. This copies the caller's complete mount namespace.
-The runtime will also assemble a new rootfs and then use `pivot_root()` to
-switch the old mount tree with the new rootfs. Afterward it will recursively
-unmount the old mount tree thereby getting rid of all mounts.
-
-Copying all of these mounts only to get rid of them later is wasteful. With a
-large mount table and a system where thousands of containers are spawned in
-parallel this quickly becomes a bottleneck increasing contention on the
-semaphore.
-
-**Use-Case:** Container runtimes can create an extremely minimal rootfs
-directly:
-
-```c
-fd_mntns = open_tree(-EBADF, "/var/lib/containers/wootwoot", OPEN_TREE_NAMESPACE);
-```
-
-This creates a mount namespace where "wootwoot" has become the rootfs. The
-caller can `setns()` into this new mount namespace and assemble additional
-mounts without copying and destroying the entire parent mount table.
diff --git a/website/content/in-progress/query-mount-information-via-file-descriptor-with-statmount.md b/website/content/in-progress/query-mount-information-via-file-descriptor-with-statmount.md
deleted file mode 100644
index 426e105..0000000
--- a/website/content/in-progress/query-mount-information-via-file-descriptor-with-statmount.md
+++ /dev/null
@@ -1,22 +0,0 @@
----
-title: "Query mount information via file descriptor with `statmount()`"
-weight: 70
-status: in-progress
-categories:
-  - mounts
-  - namespaces
----
-
-Extend `struct mnt_id_req` to accept a file descriptor and introduce
-`STATMOUNT_BY_FD` flag. When a valid fd is provided and `STATMOUNT_BY_FD`
-is set, `statmount()` returns mount info about the mount the fd is on.
-
-This works even for "unmounted" mounts (mounts that have been unmounted using
-`umount2(mnt, MNT_DETACH)`), if you have access to a file descriptor on that
-mount. These unmounted mounts will have no mountpoint and no valid mount
-namespace, so `STATMOUNT_MNT_POINT` and `STATMOUNT_MNT_NS_ID` are unset in
-`statmount.mask` for such mounts.
-
-**Use-Case:** Query mount information directly from a file descriptor without
-needing the mount ID, which is particularly useful for detached or unmounted
-mounts.
diff --git a/website/content/in-progress/support-detached-mounts-with-pivot-root.md b/website/content/in-progress/support-detached-mounts-with-pivot-root.md
deleted file mode 100644
index 8234003..0000000
--- a/website/content/in-progress/support-detached-mounts-with-pivot-root.md
+++ /dev/null
@@ -1,20 +0,0 @@
----
-title: "Support detached mounts with `pivot_root()`"
-weight: 30
-status: in-progress
-categories:
-  - filesystems
-  - mounts
----
-
-The new rootfs must currently refer to an attached mount. This restriction
-seems unnecessary. We should allow the new rootfs to refer to a detached
-mount.
-
-This will allow a service- or container manager to create a new rootfs as
-a detached, private mount that isn't exposed anywhere in the filesystem and
-then `pivot_root()` into it.
-
-Since `pivot_root()` only takes path arguments the new rootfs would need to
-be passed via `/proc/<pid>/fd/<nr>`. In the long run we should add a new
-`pivot_root()` syscall operating on file descriptors instead of paths.
diff --git a/website/content/wishlist/_index.md b/website/content/wishlist/_index.md
deleted file mode 100644
index 42f588d..0000000
--- a/website/content/wishlist/_index.md
+++ /dev/null
@@ -1,9 +0,0 @@
----
-title: "Wishlist"
-weight: 2
-bookCollapseSection: true
----
-
-Ideas and proposals for kernel features. Pick one up and start working on it!
-
-To claim an item, open a PR moving it to the **In Progress** section and add your GitHub handle or email address.
diff --git a/website/content/wishlist/a-timeout-for-the-flock-syscall.md b/website/content/wishlist/a-timeout-for-the-flock-syscall.md
deleted file mode 100644
index 9dda284..0000000
--- a/website/content/wishlist/a-timeout-for-the-flock-syscall.md
+++ /dev/null
@@ -1,20 +0,0 @@
----
-title: "A timeout for the `flock()` syscall"
-weight: 150
-status: wishlist
-categories:
-  - block-devices
-  - processes
----
-
-A timeout for the `flock()` syscall. Faking the time-out in userspace
-is nasty: most code does it with `alarm()` (or equivalent APIs), but
-that's racy since on a heavily loaded system the timeout might trigger
-before the `flock()` call is entered, in particular if short time-outs
-shall be used. More accurate is to do the locking in a short-lived
-child processed, but that's difficult already in C, and almost
-impossible in languages that do not allow `fork()` without `execve()`.
-
-**Use-Case:** as mentioned above systemd-udev allows synchronizing
-block device probing via flock(). Often userspace wants to wait
-for that, but without risking to hang forever.
diff --git a/website/content/wishlist/ability-to-only-open-regular-files.md b/website/content/wishlist/ability-to-only-open-regular-files.md
deleted file mode 100644
index 8b06f41..0000000
--- a/website/content/wishlist/ability-to-only-open-regular-files.md
+++ /dev/null
@@ -1,20 +0,0 @@
----
-title: "Ability to only open regular files"
-weight: 60
-status: wishlist
-categories:
-  - mounts
----
-
-`O_REGULAR` (inspired by the existing `O_DIRECTORY` flag for
-`open()`), which opens a file only if it is of type `S_IFREG`.
-
-**Use-Case:** this would be very useful to write secure programs
-that want to avoid being tricked into opening device nodes with
-special semantics while thinking they operate on regular
-files. This is particularly relevant as many device nodes (or even
-FIFOs) come with blocking I/O (or even blocking `open()`!) by
-default, which is not expected from regular files backed by "fast"
-disk I/O. Consider implementation of a naive web browser which is
-pointed to `file://dev/zero`, not expecting an endless amount of
-data to read.
diff --git a/website/content/wishlist/ability-to-reopen-a-struct-block-device.md b/website/content/wishlist/ability-to-reopen-a-struct-block-device.md
deleted file mode 100644
index fa48b6c..0000000
--- a/website/content/wishlist/ability-to-reopen-a-struct-block-device.md
+++ /dev/null
@@ -1,18 +0,0 @@
----
-title: "Ability to reopen a `struct block_device`"
-weight: 310
-status: wishlist
-categories:
-  - filesystems
-  - mounts
----
-
-Add ability to reopen a `struct block_device`. This would allow using
-`blkdev_get_by_path()`/`blkdev_get_{part,whole}()` to claim a device
-with `BLK_OPEN_READ` and later on reopen with
-`BLK_OPEN_READ | BLK_OPEN_WRITE`. This in turn would opening block
-devices at `fsconfig(FS_CONFIG_SET_*)` time and then at `fill_super()`
-time we would be able to reopen in case the `!(fc->sb_flags & SB_RDONLY)`.
-Overall this has the effect that we're able to open devices early
-giving the user early errors when they set mount options rather than
-very late when the superblock is created.
diff --git a/website/content/wishlist/add-mount-setattr-locked-flag-to-mount-setattr.md b/website/content/wishlist/add-mount-setattr-locked-flag-to-mount-setattr.md
deleted file mode 100644
index 5443701..0000000
--- a/website/content/wishlist/add-mount-setattr-locked-flag-to-mount-setattr.md
+++ /dev/null
@@ -1,22 +0,0 @@
----
-title: "Add `MOUNT_SETATTR_LOCKED` flag to `mount_setattr()`"
-weight: 190
-status: wishlist
-categories:
-  - mounts
-  - namespaces
-  - processes
-  - security
----
-
-Add a new `MOUNT_SETATTR_LOCKED` flag to `mount_setattr(..., ..., MOUNT_SETATTR_LOCKED, ..., ...)`.
-The `MOUNT_SETATTR_LOCKED` flag allow a `ns_capable(mntns->user_ns,
-CAP_SYS_ADMIN)` caller to lock all mount properties. The mount properties
-cannot be changed anymore.
-
-**Use-Case:** allowing processes to lock mount properties even for
-privileged processes. Locking mount properties would currently involve
-having to have the mount namespace of the container be owned by an ancestor
-user namespace. But this doesn't just lock a single mount or mount subtree
-it locks all mounts in the mount namespace, i.e., the mount table cannot be
-altered.
diff --git a/website/content/wishlist/add-process-by-pidfd-to-a-cgroup.md b/website/content/wishlist/add-process-by-pidfd-to-a-cgroup.md
deleted file mode 100644
index 1c99f11..0000000
--- a/website/content/wishlist/add-process-by-pidfd-to-a-cgroup.md
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Add process by PIDFD to a cgroup"
-weight: 370
-status: wishlist
-categories:
-  - cgroups
-  - pidfd
-  - processes
-  - sockets
----
-
-At the moment the canonical way to add a process to a cgroup is by
-echoing its PID into the `cgroup.procs` attribute in the target
-cgroupfs directory of the cgroup. This is safe as long as the
-process doing so just forked off the process it wants to migrate and
-hence can control that it hasn't been reaped yet, and hence
-guarantees the PID is valid. This is racy however if "foreign"
-processes shall be moved into the cgroup.
-
-**Use-Case:** In systemd, all user sessions are wrapped in scope
-units which are backed by a cgroup. The session processes moved into
-the scope unit are typically "foreign" processes, i.e. not children
-of the service manager, hence doing the movement is subject to races
-in case the process dies and its PID is quickly recycled. (This
-assumes systemd can acquire a pidfd of the foreign process without
-races, for example via `SCM_PIDFD` and `SO_PEERPIDFD` or similar.)
diff --git a/website/content/wishlist/asynchronous-close.md b/website/content/wishlist/asynchronous-close.md
deleted file mode 100644
index c451f23..0000000
--- a/website/content/wishlist/asynchronous-close.md
+++ /dev/null
@@ -1,29 +0,0 @@
----
-title: "Asynchronous `close()`"
-weight: 120
-status: wishlist
-categories:
-  - mounts
-  - processes
-  - sockets
----
-
-An asynchronous or forced `close()`, that guarantees that
-userspace doesn't have to risk blocking for longer periods of time
-when trying to get rid of unwanted file descriptors, possibly
-received via `recvmsg()` + `SCM_RIGHTS` (see above). Currently,
-`close()` of various file descriptors (for example those referring
-to slow storage, e.g. non-responding NFS servers and such) might
-take arbitrary amounts of time, potentially into the minute range
-and more. This makes it risky accepting file descriptors on
-publicly accessible `AF_UNIX` sockets, the way like IPC brokers
-(e.g. D-Bus) do it: if a rogue client keeps sending file
-descriptors that because unexpected must be closed immediately it
-might cause the receiving process to effectively crawl, when it is
-busy closing them all. A special form of `close()` that simply
-detaches a file descriptor from the file descriptor table without
-blocking on IO in any form would be great to close this issue.
-
-**Use-Case:** any program that receives file descriptors via `AF_UNIX`
-from untrusted clients would benefit from this. e.g. D-Bus
-brokers.
diff --git a/website/content/wishlist/at-empty-path-support-for-openat-and-openat2.md b/website/content/wishlist/at-empty-path-support-for-openat-and-openat2.md
deleted file mode 100644
index 2048e2e..0000000
--- a/website/content/wishlist/at-empty-path-support-for-openat-and-openat2.md
+++ /dev/null
@@ -1,18 +0,0 @@
----
-title: "`AT_EMPTY_PATH` support for `openat()` and `openat2()`"
-weight: 420
-status: wishlist
-categories:
-  - processes
----
-
-To get an operable version of an `O_PATH` file descriptors, it is
-possible to use `openat(fd, ".", O_DIRECTORY)` for directories, but
-other files currently require going through
-`open("/proc/<pid>/fd/<nr>")` which depends on a functioning `procfs`.
-
-FreeBSD already has `O_EMPTY_PATH` for `openat`, while `fstatat` and
-similar functions have `AT_EMPTY_PATH`.
-
-**Use-Case:** When dealing with `O_PATH` file descriptors, allow
-re-opening an operable version without the need of `procfs`.
diff --git a/website/content/wishlist/at-empty-path-support-for-unlinkat.md b/website/content/wishlist/at-empty-path-support-for-unlinkat.md
deleted file mode 100644
index 0e22b00..0000000
--- a/website/content/wishlist/at-empty-path-support-for-unlinkat.md
+++ /dev/null
@@ -1,11 +0,0 @@
----
-title: "`AT_EMPTY_PATH` support for `unlinkat()`"
-weight: 290
-status: wishlist
-categories:
-  - processes
----
-
-**Use-Case:** When dealing with files/directories, allow passing
-around only a file descriptor without having to keep the path around
-to be able to unlink the file/directory.
diff --git a/website/content/wishlist/automatic-growing-of-btrfs-filesystems.md b/website/content/wishlist/automatic-growing-of-btrfs-filesystems.md
deleted file mode 100644
index 9201579..0000000
--- a/website/content/wishlist/automatic-growing-of-btrfs-filesystems.md
+++ /dev/null
@@ -1,30 +0,0 @@
----
-title: "Automatic growing of `btrfs` filesystems"
-weight: 360
-status: wishlist
-categories:
-  - block-devices
-  - filesystems
----
-
-An *auto-grow* feature in `btrfs` would be excellent.
-
-If such a mode is enabled, `btrfs` would automatically grow a file
-system up to the size of its backing block devices. Example: btrfs
-is created with 200M in size on a block device 2G in size. Once the
-file system is filled up fully, `btrfs` would automatically grow the
-file system as need in the increments it needs, up to the 2G that
-the backing block device is in size.
-
-**Use-Case:** This would allow creating minimal, compact file
-systems: just create them small on a sparse block device, and copy
-files into it, as needed, create subvolumes and whatever else is
-desired. As long as only files are created and written (but not
-modified) the resulting fs should be automatically minimal in size.
-This would specifically be useful in `systemd-homed`, which
-maintains per-user `btrfs` file systems backed by block
-devices. Currently, `homed` grows the file systems manually on login
-and then shrinks them again on logout, but this is less than ideal,
-since btrfs places files all over the backing store, and thus the
-shrinking will generate a lot of nonsensical IO that could be
-reduced if the file system was always kept minimal in size anyway.
diff --git a/website/content/wishlist/auxiliary-socket-message-describing-the-sender-s-cgroup.md b/website/content/wishlist/auxiliary-socket-message-describing-the-sender-s-cgroup.md
deleted file mode 100644
index a6800cf..0000000
--- a/website/content/wishlist/auxiliary-socket-message-describing-the-sender-s-cgroup.md
+++ /dev/null
@@ -1,21 +0,0 @@
----
-title: "Auxiliary socket message describing the sender's cgroup"
-weight: 40
-status: wishlist
-categories:
-  - cgroups
-  - sockets
----
-
-`SCM_CGROUPID` or a similar auxiliary socket message, that allows
-receivers to figure out which cgroup a sender is part of.
-
-**Use-Case:** `systemd-journald` picks up cgroup information from
-logging clients, in order to augment log records and allow
-filtering via this meta-information. In particular it derives
-service identity from that (so that requests such as "Show me all
-log messages of service X!" can be answered). This is currently
-racy, since it uses `SCM_CREDS`' `.pid` field for this, which it then
-used to load `/proc/$PID/cgroup`. In particular for programs that
-log and immediately exit, the cgroup information frequently cannot
-be acquired anymore by `systemd-journald`.
diff --git a/website/content/wishlist/blobfs.md b/website/content/wishlist/blobfs.md
deleted file mode 100644
index 810be6f..0000000
--- a/website/content/wishlist/blobfs.md
+++ /dev/null
@@ -1,22 +0,0 @@
----
-title: "blobfs"
-weight: 260
-status: wishlist
-categories:
-  - filesystems
----
-
-[`blobfs`](https://fuchsia.dev/fuchsia-src/concepts/filesystems/blobfs)
-for Linux. i.e. a minimalistic file system, that can store
-authenticated (Verity) data files, that can be written once, and
-not be modified after that, and provide stable handles (i.e. is
-content-addressable) to them.
-
-**Use-Case:** This would deliver just about enough to place
-trusted OS resources (binaries, kernels, initrds, fs trees, other
-resources) in them, without having to trust the medium and IO
-underneath. Should be simple enough to even implement in a boot
-loader and similar, without making things vulnerable to rogue file
-system image attacks. The OS and its payloads (apps, containers,
-…) could then be composed from these resources, through means like
-overlayfs, namespacing and more.
diff --git a/website/content/wishlist/clock-monotonic-network-timestamps.md b/website/content/wishlist/clock-monotonic-network-timestamps.md
deleted file mode 100644
index 12db0ec..0000000
--- a/website/content/wishlist/clock-monotonic-network-timestamps.md
+++ /dev/null
@@ -1,21 +0,0 @@
----
-title: "`CLOCK_MONOTONIC` network timestamps"
-weight: 130
-status: wishlist
-categories:
-  - sockets
----
-
-Currently network timestamps are exclusively in `CLOCK_REALTIME`, even
-though for many (most?) a monotonic clock would be much preferable, as
-calculations become easier when one doesn't have to think about clock
-jumps and similar.
-
-**Use-Case:** `systemd-journald` collects implicit timestamps via
-`AF_UNIX` time-stamping, in `CLOCK_REALTIME`, even though for its
-internal logic only monotonic timestamps are used, as log records
-are searched via bisection in ordered tables, that require
-strictly increasing timestamps. In particular during boot (where
-`CLOCK_REALTIME` is often not available, stable or subject to
-corrections) it would be good to have reliable, monotonic
-timestamps on all log records.
diff --git a/website/content/wishlist/clone-pidfd-autokill-semantics-for-pid-1.md b/website/content/wishlist/clone-pidfd-autokill-semantics-for-pid-1.md
deleted file mode 100644
index bdba0b0..0000000
--- a/website/content/wishlist/clone-pidfd-autokill-semantics-for-pid-1.md
+++ /dev/null
@@ -1,37 +0,0 @@
----
-title: "`CLONE_PIDFD_AUTOKILL` semantics for PID 1"
-weight: 20
-status: wishlist
-categories:
-  - cgroups
-  - pidfd
-  - processes
----
-
-Allow obtaining a `CLONE_PIDFD_AUTOKILL` pidfd for PID 1. Currently
-PID 1 cannot hand off an autokill pidfd for itself. Allowing this would
-make it possible to create system-death-traps where the lifetime of
-PID 1 is tied to another process. PID 1 creates a `CLONE_PIDFD_AUTOKILL`
-pidfd for itself, hands it off to another task, and closes its own copy.
-If that other task exits, PID 1 is taken down.
-
-**Use-Case:** Tie the lifetime of PID 1 to a critical process such as a
-software TPM or other security-sensitive daemon. This ensures the system
-is brought down if the critical process dies, rather than continuing to
-run in a potentially compromised state.
-
-**Considerations:** When PID 1 is spawned there is no mechanism to start
-it with a pidfd right away. There are two possible approaches:
-
-1. Place a pidfd at file descriptor position 3 in PID 1's file descriptor
-   table before `exec()`, similar to how the coredump usermodehelper works.
-   After `exec()` PID 1 knows that it already has an autokill pidfd for
-   itself opened at fd 3.
-
-2. Allow opening an autokill pidfd via `pidfd_open()`. This would require
-   mutual exclusion with `CLONE_PIDFD_AUTOKILL`: if an autokill pidfd
-   already exists from `clone3()` then no new autokill pidfd can be
-   created via `pidfd_open()`. This guarantees clean semantics.
-
-Permission checking would have to be strict. It should probably only be
-allowed for the current thread-group leader on itself.
diff --git a/website/content/wishlist/determining-if-a-mount-point-belongs-to-the-current-user.md b/website/content/wishlist/determining-if-a-mount-point-belongs-to-the-current-user.md
deleted file mode 100644
index 421a25a..0000000
--- a/website/content/wishlist/determining-if-a-mount-point-belongs-to-the-current-user.md
+++ /dev/null
@@ -1,27 +0,0 @@
----
-title: "Determining if a mount point belongs to the current user"
-weight: 80
-status: wishlist
-categories:
-  - mounts
-  - namespaces
-  - processes
----
-
-Ability to determine if a mount point belongs to the current user
-namespace, in order to check if there's a chance a process can
-safely unmount it (as that only works for mounts owned by the same
-user namespaces — or one further down the tree, but not any up the
-tree). A simple, additional field in `/proc/self/mountinfo`
-container the owning user namespace ID would probably already
-suffice.
-
-**Use-Case:** the `systemd` system and service manager tries to unmount
-all established mounts on shutdown. Inside of container
-environments where specific mounts are established by the
-container manager (and not the payload itself) this will
-ultimately fail if user namespaces are enabled. In order to clean
-up the shutdown logic it would be very good to be able to
-determine whether a specific mount could even possibly be
-unmounted or whether it's not worth the effort to include the
-unmount in the system shutdown transaction.
diff --git a/website/content/wishlist/device-cgroup-guard-to-allow-mknod-in-non-initial-userns.md b/website/content/wishlist/device-cgroup-guard-to-allow-mknod-in-non-initial-userns.md
deleted file mode 100644
index a940608..0000000
--- a/website/content/wishlist/device-cgroup-guard-to-allow-mknod-in-non-initial-userns.md
+++ /dev/null
@@ -1,28 +0,0 @@
----
-title: "Device cgroup guard to allow `mknod()` in non-initial userns"
-weight: 280
-status: wishlist
-categories:
-  - cgroups
-  - filesystems
-  - mounts
-  - namespaces
-  - security
----
-
-If a container manager restricts its unprivileged (user namespaced)
-children by a device cgroup, it is not necessary to deny `mknod()`
-anymore. Thus, user space applications may map devices on different
-locations in the file system by using `mknod()` inside the container.
-
-**Use-Case:** A use case for this, which is applied by users of GyroidOS,
-is to run `virsh` for VMs inside an unprivileged container. `virsh` or
-libvirt creates device nodes, e.g., `/var/run/libvirt/qemu/11-fgfg.dev/null`
-which currently fails in a non-initial userns, even if a cgroup device white
-list with the corresponding major, minor of `/dev/null` exists. Thus, in
-this case the usual bind mounts or pre populated device nodes under `/dev`
-are not sufficient.
-
-An initial group internal RFC exists in
-(https://github.com/quitschbo/linux/tree/devcg_guard_rfc).
-See commit message for more implementation specific details.
diff --git a/website/content/wishlist/excluding-processes-watched-via-pidfd-from-waitid-p-all.md b/website/content/wishlist/excluding-processes-watched-via-pidfd-from-waitid-p-all.md
deleted file mode 100644
index 465ffaa..0000000
--- a/website/content/wishlist/excluding-processes-watched-via-pidfd-from-waitid-p-all.md
+++ /dev/null
@@ -1,22 +0,0 @@
----
-title: "Excluding processes watched via `pidfd` from `waitid(P_ALL, …)`"
-weight: 110
-status: wishlist
-categories:
-  - pidfd
-  - processes
----
-
-**Use-Case:** various programs use `waitid(P_ALL, …)` to collect exit
-information of exited child processes. In particular PID 1 and
-processes using `PR_SET_CHILD_SUBREAPER` use this as they may
-collect unexpected children that have been reparented from dying
-sub-processes, and that need to be reaped in order to clean up the
-PID space. Currently, these programs cannot easily mix waiting for
-specific sub-processes via `pidfd` with waiting for the other
-*unexpected* children via `waitid(P_ALL, …)` since the latter also
-reaps (and thus invalidates) the pidfd-tracked
-children. Specifically, the `systemd` service manager would like
-to use `pidfd`s to remove PID recycling security issues, but
-currently cannot as it also needs to generically wait for such
-unexpected children.
diff --git a/website/content/wishlist/extend-io-uring-with-classic-synchronous-system-calls.md b/website/content/wishlist/extend-io-uring-with-classic-synchronous-system-calls.md
deleted file mode 100644
index 6437812..0000000
--- a/website/content/wishlist/extend-io-uring-with-classic-synchronous-system-calls.md
+++ /dev/null
@@ -1,14 +0,0 @@
----
-title: "Extend `io_uring` with classic synchronous system calls"
-weight: 240
-status: wishlist
-categories:
-  - io-uring
-  - mounts
-  - namespaces
----
-
-The `io_uring` subsystem is open to adding classic existing synchronous
-system calls (e.g. `setns()` or `mount()` or other) to `io_uring`.
-They also said they would support adding new functionality into
-`io_uring` that is not exposed through system calls yet.
diff --git a/website/content/wishlist/extend-mount-setattr-to-allow-changing-mount-properties-ignoring-any-failures.md b/website/content/wishlist/extend-mount-setattr-to-allow-changing-mount-properties-ignoring-any-failures.md
deleted file mode 100644
index 5ad019f..0000000
--- a/website/content/wishlist/extend-mount-setattr-to-allow-changing-mount-properties-ignoring-any-failures.md
+++ /dev/null
@@ -1,12 +0,0 @@
----
-title: "Extend `mount_setattr()` to allow changing mount properties ignoring any failures"
-weight: 160
-status: wishlist
-categories:
-  - mounts
----
-
-**Use-Case:** workloads that know that there are mounts in a mount tree
-whose attributes cannot be changed by the caller don't want
-`mount_settattr()` to fail on the first mount it failed to convert. Give
-them a flag to request changes ignoring failures.
diff --git a/website/content/wishlist/extend-setns-to-allow-attaching-to-all-new-namespaces-of-a-process.md b/website/content/wishlist/extend-setns-to-allow-attaching-to-all-new-namespaces-of-a-process.md
deleted file mode 100644
index d65f6f4..0000000
--- a/website/content/wishlist/extend-setns-to-allow-attaching-to-all-new-namespaces-of-a-process.md
+++ /dev/null
@@ -1,18 +0,0 @@
----
-title: "Extend `setns()` to allow attaching to all new namespaces of a process"
-weight: 200
-status: wishlist
-categories:
-  - namespaces
-  - pidfd
-  - processes
----
-
-Add an extension to `setns()` to allow attaching to all namespaces of
-a process `SETNS_PIDFD_ALL` different from the caller's namespaces.
-Currently specifying e.g., `CLONE_NEWUSER` fails if the caller is in the
-same user namespace as the target process. This is very inconvenient.
-
-**Use-Case:** Make it trivial to attach to all namespaces of a process
-without having to figure out whether the caller is already in the same
-namespace or not.
diff --git a/website/content/wishlist/filtering-on-received-file-descriptors.md b/website/content/wishlist/filtering-on-received-file-descriptors.md
deleted file mode 100644
index bf5c58b..0000000
--- a/website/content/wishlist/filtering-on-received-file-descriptors.md
+++ /dev/null
@@ -1,16 +0,0 @@
----
-title: "Filtering on received file descriptors"
-weight: 100
-status: wishlist
-categories:
-  - filesystems
-  - sockets
----
-
-An alternative to the previous item could be if some form of filtering
-could be enforced on the file descriptors suitable for enqueuing on
-the `AF_UNIX` socket. i.e. allow filtering by superblock type or
-similar, so that policies such as "only `memfd`s are OK to be
-received" may be expressed. (BPF?).
-
-**Use-Case:** as above.
diff --git a/website/content/wishlist/immutable-layers-for-overlayfs.md b/website/content/wishlist/immutable-layers-for-overlayfs.md
deleted file mode 100644
index 70fcdc6..0000000
--- a/website/content/wishlist/immutable-layers-for-overlayfs.md
+++ /dev/null
@@ -1,18 +0,0 @@
----
-title: "Immutable layers for `overlayfs`"
-weight: 340
-status: wishlist
-categories:
-  - filesystems
----
-
-`overlayfs` should permit *immutable* layers, i.e. layers whose
-non-directory inodes may not be overriden in an upper writable
-layer.
-
-**Use-Case:** This would be useful when implementing `/etc/` as a
-stack of overlayfs layers, each shipping configuration for a
-different facet of the system, with a writable layer on the top for
-local modifications. In such a scenario it would be useful to allow
-the user to change any configuration it likes, except of the files
-and other inodes shipped in the lower layers.
diff --git a/website/content/wishlist/immutable-loopback-block-devices.md b/website/content/wishlist/immutable-loopback-block-devices.md
deleted file mode 100644
index 53ab1e4..0000000
--- a/website/content/wishlist/immutable-loopback-block-devices.md
+++ /dev/null
@@ -1,23 +0,0 @@
----
-title: "Immutable loopback block devices"
-weight: 140
-status: wishlist
-categories:
-  - block-devices
-  - filesystems
-  - mounts
----
-
-Truly immutable loopback block devices. Right now setting up a
-loopback block device in read-only mode, backed by a read-only
-file (stored on a regular read/write file system), and then
-mounting it with `ext4` also in `MS_RDONLY`mode *will* result in
-changes to the file, quite unexpectedly 🤯. Ideally, if a loopback
-block device is set up in read-only mode this should guarantee
-that the backing file remains unmodified by it.
-
-**Use-Case:** disk image build tools that want to reproducibly and
-verifiable build images must be able to rely that mounting them in
-read-only mode does not alter the images in any way. In particular
-when working in computer forensics one must be able to rely that
-file systems that are analyzed remain unmodified by the analysis.
diff --git a/website/content/wishlist/inotify-events-for-bsd-file-locks.md b/website/content/wishlist/inotify-events-for-bsd-file-locks.md
deleted file mode 100644
index f0e8634..0000000
--- a/website/content/wishlist/inotify-events-for-bsd-file-locks.md
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "inotify() events for BSD file locks"
-weight: 30
-status: wishlist
-categories:
-  - block-devices
-  - filesystems
----
-
-BSD file locks (i.e. `flock()`, as opposed to POSIX `F_SETLK` and
-friends are inode-focussed, hence would be great if one could get
-asynchronous notification when they are released via inotify.
-
-**Use-Case:** udevd probes block devices whenever they pop up to
-create /dev/disk/by-label/* and similar symlinks. Formatting tools
-can temporarily block this behaviour by taking a BSD file lock on
-the block device (as per https://systemd.io/BLOCK_DEVICE_LOCKING),
-in order to make sure udevd doesn't probe file systems/partition
-tables that are only partially initialized. Currently, udevd uses
-inotify `IN_CLOSE_WRITE` notifications to detect whenever
-applications close a block device after writing to it, and
-automatically reprobes the device. This works reasonably OK given
-that block devices are usually closed at the same time as their
-BSD file lock is released, and vice versa. However, this is not
-fully correct: what udevd actually should be watching is the locks
-being released, not the devices being closed.
diff --git a/website/content/wishlist/ioctl-api-for-overlayfs.md b/website/content/wishlist/ioctl-api-for-overlayfs.md
deleted file mode 100644
index f11a7c6..0000000
--- a/website/content/wishlist/ioctl-api-for-overlayfs.md
+++ /dev/null
@@ -1,23 +0,0 @@
----
-title: "`ioctl()` API for `overlayfs`"
-weight: 350
-status: wishlist
-categories:
-  - block-devices
-  - filesystems
----
-
-`overlayfs` should have an `ioctl()`-based API (or similar) for
-querying information of the backing file systems/block devices
-
-**Use-Case:** In systemd in various areas we automatically find the
-block device backing the root file system and other file systems
-(Example: `systemd-gpt-auto-generator` or `bootctl` wull try to find
-auxiliary file systems of the OS image by looking in the GPT
-partition table the root file system is located in). While this
-logic is good enough to find the backing block devices of some more
-complex storage such as dm-crypt, dm-verity or btrfs, once
-`overlayfs` is used as backing for the root file system this logic
-does not work anymore. It would be great if there was an API to
-simply query `overlayfs` for the superblock information
-(i.e. `.st_dev`) of the backing layers.
diff --git a/website/content/wishlist/linking-of-o-tmpfile-files-with-replacement.md b/website/content/wishlist/linking-of-o-tmpfile-files-with-replacement.md
deleted file mode 100644
index 5b097a4..0000000
--- a/website/content/wishlist/linking-of-o-tmpfile-files-with-replacement.md
+++ /dev/null
@@ -1,21 +0,0 @@
----
-title: "Linking of `O_TMPFILE` files with replacement"
-weight: 50
-status: wishlist
-categories:
-  - filesystems
----
-
-Ability to link an `O_TMPFILE` file into a directory while *replacing* an
-existing file. (Currently there's only the ability to link it in, if the
-file name doesn't exist yet.)
-
-**Use-Case:** there are many programs (e.g. `systemd-hostnamed`
-when updating `/etc/hostname`) that atomically want to update a
-file, so that either the old or the new version is in place, but
-never a partially updated one. The canonical way to do this is by
-creating a temporary file with the new contents, and then renaming
-it to the filename of the file to update, thus atomically replacing
-it. Currently, the temporary file for this must be created with a
-random name, `O_TMPFILE` cannot be used, since for these files
-atomic-replace is not supported, currently.
diff --git a/website/content/wishlist/make-quotas-work-with-user-namespaces.md b/website/content/wishlist/make-quotas-work-with-user-namespaces.md
deleted file mode 100644
index ce3945c..0000000
--- a/website/content/wishlist/make-quotas-work-with-user-namespaces.md
+++ /dev/null
@@ -1,18 +0,0 @@
----
-title: "Make quotas work with user namespaces"
-weight: 180
-status: wishlist
-categories:
-  - mounts
-  - namespaces
-  - security
----
-
-The quota codepaths in the kernel currently broken and inconsistent
-and most interesting operations are guarded behind
-`capable(CAP_SYS_ADMIN)`, i.e., require `CAP_SYS_ADMIN` in the initial
-user namespace. We should rework these codepaths to work with user
-namespaces and then see whether we can make them work with idmapped
-mounts.
-
-**Use-Case:** using quotas correctly in containers.
diff --git a/website/content/wishlist/map-different-uids-gids-to-the-same-uids-gids.md b/website/content/wishlist/map-different-uids-gids-to-the-same-uids-gids.md
deleted file mode 100644
index 3772ee0..0000000
--- a/website/content/wishlist/map-different-uids-gids-to-the-same-uids-gids.md
+++ /dev/null
@@ -1,17 +0,0 @@
----
-title: "Map different uids/gids to the same uids/gids?"
-weight: 250
-status: wishlist
-categories:
-  - filesystems
-  - mounts
----
-
-Explore the idea of mapping different uids/gids to the same uids/gids, i.e.
-65534:1000:1 50000:1000:1. This will only work if the mount is read-only as
-the kernel wouldn't know what uid/gid would need to be put to disk
-otherwise (65534? 50000? the first one that is mapped?).
-
-**Use-Case:** Delegate multiple {g,u}ids to the same user. Merging
-ownership similar to how overlayfs merges files. Bindfs
-(https://bindfs.org/docs/bindfs.1.html#sect3) allows this concept too.
diff --git a/website/content/wishlist/namespaced-loop-and-block-devices.md b/website/content/wishlist/namespaced-loop-and-block-devices.md
deleted file mode 100644
index 2d70077..0000000
--- a/website/content/wishlist/namespaced-loop-and-block-devices.md
+++ /dev/null
@@ -1,14 +0,0 @@
----
-title: "Namespaced loop and block devices"
-weight: 270
-status: wishlist
-categories:
-  - block-devices
-  - mounts
-  - namespaces
----
-
-Namespace-able loop and block devices, usable inside user namespaces.
-
-**Use-Case:** Allow mounting images inside nspawn containers, and using
-RootImage= and friends in the systemd user manager.
diff --git a/website/content/wishlist/open-thread-group-leader-via-pidfd-open.md b/website/content/wishlist/open-thread-group-leader-via-pidfd-open.md
deleted file mode 100644
index 011f765..0000000
--- a/website/content/wishlist/open-thread-group-leader-via-pidfd-open.md
+++ /dev/null
@@ -1,18 +0,0 @@
----
-title: "Open thread-group leader via `pidfd_open()`"
-weight: 380
-status: wishlist
-categories:
-  - pidfd
----
-
-Extend `pidfd_open()` to allow opening the thread-group leader based on the
-PID of an individual thread. Currently we do support:
-
-1. `pidfd_open(1234, 0)` on a thread-group leader PID
-2. `pidfd_open(1234, PIDFD_THREAD)` on a thread
-
-Add an option to go from individual thread to thread-group leader.
-
-**Use-Case:** Allow for a race free way to go from individual thread
-to thread-group leader pidfd.
diff --git a/website/content/wishlist/path-based-acl-management-in-an-lsm-hook.md b/website/content/wishlist/path-based-acl-management-in-an-lsm-hook.md
deleted file mode 100644
index 0eb0f11..0000000
--- a/website/content/wishlist/path-based-acl-management-in-an-lsm-hook.md
+++ /dev/null
@@ -1,17 +0,0 @@
----
-title: "Path-based ACL management in an LSM hook"
-weight: 330
-status: wishlist
-categories:
-  - filesystems
-  - mounts
-  - security
----
-
-The LSM module API should have the ability to do path-based (not
-just inode-based) ACL management.
-
-**Use-Case:** This would be useful in BPF-LSM modules such as
-systemd's `mntfsd` which allows unprivileged file system mounts in
-some cases, and which would like to restrict ACL handling based on
-the superblock involved.
diff --git a/website/content/wishlist/per-cgroup-limit-for-coredump-sizes.md b/website/content/wishlist/per-cgroup-limit-for-coredump-sizes.md
deleted file mode 100644
index f0e8ee3..0000000
--- a/website/content/wishlist/per-cgroup-limit-for-coredump-sizes.md
+++ /dev/null
@@ -1,18 +0,0 @@
----
-title: "Per-cgroup limit for coredump sizes"
-weight: 220
-status: wishlist
-categories:
-  - cgroups
-  - processes
----
-
-A per-cgroup knob for coredump sizes. Currently coredump size
-control is strictly per process, and primarily under control of
-the processes themselves. It would be good if we had a per-cgroup
-knob instead, that is under control of the service manager.
-
-**Use-Case:** coredumps can be heavy to generate. For different
-usecases it would be good to be able to opt-in or opt-out
-dynamically from coredumps for specific services, at runtime
-without restarting them.
diff --git a/website/content/wishlist/race-free-creation-and-opening-of-non-file-inodes.md b/website/content/wishlist/race-free-creation-and-opening-of-non-file-inodes.md
deleted file mode 100644
index c076de4..0000000
--- a/website/content/wishlist/race-free-creation-and-opening-of-non-file-inodes.md
+++ /dev/null
@@ -1,31 +0,0 @@
----
-title: "Race-free creation and opening of non-file inodes"
-weight: 230
-status: wishlist
-categories:
-  - filesystems
-  - sockets
----
-
-A way to race-freely create an (non-file) inode and immediately
-open it. For regular files we have open(O_CREAT) for creating a
-new file inode, and returning a pinning fd to it. This is missing
-for other inode types, such as directories, device nodes,
-FIFOs. The lack of such functionality means that when populating a
-directory tree there's always a race involved: the inodes first
-need to be created, and then opened to adjust their
-permissions/ownership/labels/timestamps/acls/xattrs/…, but in the
-time window between the creation and the opening they might be
-replaced by something else. Addressing this race without proper
-APIs is possible (by immediately fstat()ing what was opened, to
-verify that it has the right inode type), but difficult to get
-right. Hence, mkdirat_fd() that creates a directory *and* returns
-an O_DIRECTORY fd to it would be great. As would be mknodeat_fd()
-that creates a device node, FIFO or (dead) socket and returns an
-O_PATH fd to it. And of course symlinkat_fd() that creates a
-symlink and returns an O_PATH fd to it.
-
-**Use-Case:** any program that creates/unpacks not just files, but
-directories, device nodes, fifos, and wants to ensure that they
-safely get the right attributes applied, even if other code might
-simultaneously have access to the same directory tree.
diff --git a/website/content/wishlist/race-free-mounting-of-block-devices.md b/website/content/wishlist/race-free-mounting-of-block-devices.md
deleted file mode 100644
index 25f1116..0000000
--- a/website/content/wishlist/race-free-mounting-of-block-devices.md
+++ /dev/null
@@ -1,18 +0,0 @@
----
-title: "Race-free mounting of block devices"
-weight: 300
-status: wishlist
-categories:
-  - block-devices
-  - filesystems
-  - mounts
----
-
-Introduce a new struct to `fsconfig()` as an alternative to the
-`source` property. The struct contains at least a pointer to a path,
-possibly a device minor and major, and a diskseq number. The VFS can
-expose a helper that filesystems can call and use the diskseq number
-to verify that the block device they are intending to mount is indeed
-the one they want to mount.
-
-**Use-Case:** Race-free mounting of block devices.
diff --git a/website/content/wishlist/read-only-propagation-of-mounts.md b/website/content/wishlist/read-only-propagation-of-mounts.md
deleted file mode 100644
index 255e315..0000000
--- a/website/content/wishlist/read-only-propagation-of-mounts.md
+++ /dev/null
@@ -1,29 +0,0 @@
----
-title: "Read-only propagation of mounts"
-weight: 90
-status: wishlist
-categories:
-  - mounts
-  - namespaces
----
-
-A way to mark mounts that receive mount propagation events from
-elsewhere so that these propagated mounts are established
-read-only implicitly. Right now, if a mount receives a mount
-propagation event it will have the exact same `MS_RDONLY`,
-`MS_NODEV`, … flags as it has where it originated. It would be
-very useful if an `MS_RDONLY` could be ORed into the mount flags
-automatically whenever propagated elsewhere.
-
-**Use-Case:** various mount namespace based sandboxes
-(e.g. `systemd`'s `ProtectSystem=` option) mark large parts of the
-host file hierarchy read-only via mounting it
-`MS_RDONLY|MS_BIND|MS_REMOUNT`, but generally intend to leave the
-file hierarchy besides that the way it is, and that includes they
-typically still want to be able to receive mount events to
-directories such as `/mnt/` and `/media/` in these sandboxed
-environments. Right now, any such propagation then happens in
-writable mode, even if the file hierarchy otherwise is almost
-entirely read-only. To close this gap it would be great if such
-propagated mounts could implicitly gain `MS_RDONLY` as they are
-propagated.
diff --git a/website/content/wishlist/reasonable-eof-on-sock-seqpacket.md b/website/content/wishlist/reasonable-eof-on-sock-seqpacket.md
deleted file mode 100644
index bb35f21..0000000
--- a/website/content/wishlist/reasonable-eof-on-sock-seqpacket.md
+++ /dev/null
@@ -1,17 +0,0 @@
----
-title: "Reasonable EOF on SOCK_SEQPACKET"
-weight: 400
-status: wishlist
-categories:
-  - sockets
----
-
-Zero size datagrams cannot be distinguished from EOF on
-`SOCK_SEQPACKET`. Both will cause `recvmsg()` to return zero.
-
-Idea how to improve things: maybe define a new MSG_XYZ flag for this,
-which causes either of the two cases result in some recognizable error
-code returned rather than a 0.
-
-**Use-Case:** Any code that wants to use `SOCK_SEQPACKET` and cannot
-effort disallowing zero sized datagrams from their protocol.
diff --git a/website/content/wishlist/reasonable-handling-of-selinux-dropping-scm-rights-fds.md b/website/content/wishlist/reasonable-handling-of-selinux-dropping-scm-rights-fds.md
deleted file mode 100644
index 0b224c8..0000000
--- a/website/content/wishlist/reasonable-handling-of-selinux-dropping-scm-rights-fds.md
+++ /dev/null
@@ -1,19 +0,0 @@
----
-title: "Reasonable Handling of SELinux dropping SCM_RIGHTS fds"
-weight: 410
-status: wishlist
-categories:
-  - sockets
----
-
-Currently, if SELinux refuses to let some file descriptor through, it
-will just drop them from the `SCM_RIGHTS` array. That's a terrible
-idea, since applications rely on the precise arrangement of the array
-to know which fd is which. By dropping entries silently, these apps
-will all break.
-
-Idea how to improve things: leave the elements in the array in place,
-but return a marker instead (i.e. negative integer, maybe `-EPERM`) that
-tells userspace that there was an fd, but it was not allowed through.
-
-**Use-Case:** Any code that wants to use `SCM_RIGHTS` properly.
diff --git a/website/content/wishlist/security-hook-for-mount-setattr.md b/website/content/wishlist/security-hook-for-mount-setattr.md
deleted file mode 100644
index 2d20523..0000000
--- a/website/content/wishlist/security-hook-for-mount-setattr.md
+++ /dev/null
@@ -1,13 +0,0 @@
----
-title: "Security hook for `mount_setattr()`"
-weight: 210
-status: wishlist
-categories:
-  - mounts
-  - security
----
-
-(kAPI) Add security hook to `mount_setattr()`.
-
-**Use-Case:** Allow LSMs to make decisions about what mount properties to
-allow and what to deny.
diff --git a/website/content/wishlist/specification-of-a-keyring-for-dm-verity-volume-verification.md b/website/content/wishlist/specification-of-a-keyring-for-dm-verity-volume-verification.md
deleted file mode 100644
index accdeaf..0000000
--- a/website/content/wishlist/specification-of-a-keyring-for-dm-verity-volume-verification.md
+++ /dev/null
@@ -1,17 +0,0 @@
----
-title: "Specification of a keyring for dm-verity volume verification"
-weight: 320
-status: wishlist
-categories:
-  - block-devices
----
-
-When activating a dm-verity volume allow specifying keyring to
-validate root hash signature against.
-
-**Use-Case:** In systemd, we'd like to authenticate Portable Service
-images, system extension images, configuration images, container
-images with different keys, as they typically originate from
-different sources and it should not be possible to generate a
-system extension with a key pair that is supposed to be good for
-container images only.
diff --git a/website/content/wishlist/unlinking-via-two-file-descriptors.md b/website/content/wishlist/unlinking-via-two-file-descriptors.md
deleted file mode 100644
index 34f3845..0000000
--- a/website/content/wishlist/unlinking-via-two-file-descriptors.md
+++ /dev/null
@@ -1,22 +0,0 @@
----
-title: "Unlinking via two file descriptors"
-weight: 70
-status: wishlist
-categories:
-  - filesystems
-  - processes
----
-
-`unlinkat3(dir_fd, name, inode_fd)`: taking one file descriptor
-for the directory to remove a file in, and another one referring
-to the inode of the filename to remove. This call should only
-succeed if the specified filename still refers to the specified
-inode.
-
-**Use-Case:** code that operates on a well-know path that might be
-shared by multiple programs that jointly manage it might want to
-safely remove a filename under the guarantee it still refers to
-the expected inode. As a specific example, consider lock files,
-that should be cleaned up only if they still refer to the assumed
-owner's instance, but leave the file in place if another process
-already took over the filename.
diff --git a/website/content/wishlist/upgrade-masks-in-openat2.md b/website/content/wishlist/upgrade-masks-in-openat2.md
deleted file mode 100644
index c024a02..0000000
--- a/website/content/wishlist/upgrade-masks-in-openat2.md
+++ /dev/null
@@ -1,13 +0,0 @@
----
-title: "Upgrade masks in `openat2()`"
-weight: 170
-status: wishlist
-categories:
-  - security
----
-
-Add upgrade masks to `openat2()`. Extend `struct open_how` to allow
-restrict re-opening of file descriptors.
-
-**Use-Case:** block services or containers from re-opening/upgrading an
-`O_PATH` file descriptor through e.g. `/proc/<pid>/fd/<nr` as `O_WRONLY`.
diff --git a/website/content/wishlist/useful-handling-of-lsm-denials-on-scm-rights.md b/website/content/wishlist/useful-handling-of-lsm-denials-on-scm-rights.md
deleted file mode 100644
index 3019c3b..0000000
--- a/website/content/wishlist/useful-handling-of-lsm-denials-on-scm-rights.md
+++ /dev/null
@@ -1,41 +0,0 @@
----
-title: "Useful handling of LSM denials on SCM_RIGHTS"
-weight: 390
-status: wishlist
-categories:
-  - security
-  - sockets
----
-
-Right now if some LSM such as SELinux denies an `AF_UNIX` socket peer
-to receive an `SCM_RIGHTS` fd the `SCM_RIGHTS` fd array will be cut
-short at that point, and `MSG_CTRUNC` is set on return of
-`recvmsg()`. This is highly problematic behaviour, because it leaves
-the receiver wondering what happened. As per man page `MSG_CTRUNC` is
-supposed to indicate that the control buffer was sized too short, but
-suddenly a permission error might result in the exact same flag being
-set. Moreover, the receiver has no chance to determine how many fds
-got originally sent and how many were suppressed.
-
-Ideas how to improve things:
-
-1. Maybe introduce a new flag `MSG_RIGHTS_DENIAL` or so which is set
-   on `recvmsg()` return, which tells us that fds where dropped from
-   the `SCM_RIGHTS` array because of an LSM error. This new flag could
-   be set in addition to `CMSG_CTRUNC`, for compatibility.
-
-2. Maybe, define a new flag `MSG_RIGHTS_FILTER` or so which when
-   passed to `recvmsg()` will ensure that the `SCM_RIGHTS` fd array is
-   always passed through in its full, original size. Entries for which
-   an LSM says no are suppressed, and replaced by a special value, for
-   example `-EPERM`.
-
-3. It would be good if the relevant man page would at least document
-   this pitfall, even if it right now cannot reasonably be handled.
-
-Ideally both ideas would be implemented, but of course, strictly
-speaking the 2nd idea makes the 1st idea half-way redundant.
-
-**Use-Case:** Any code that uses `SCM_RIGHTS` generically (D-Bus and
-so on) needs this, so that it can reasonably handle SELinux AVC errors
-on received messages.
diff --git a/website/content/wishlist/xattrs-for-pidfd.md b/website/content/wishlist/xattrs-for-pidfd.md
deleted file mode 100644
index 513f876..0000000
--- a/website/content/wishlist/xattrs-for-pidfd.md
+++ /dev/null
@@ -1,16 +0,0 @@
----
-title: "xattrs for pidfd"
-weight: 10
-status: wishlist
-categories:
-  - filesystems
-  - pidfd
----
-
-Since pidfds have been moved to a separate pidfs filesystem it is easy
-to add support for xattrs on pidfds. That could be valuable to store
-meta information along the pidfd. Storing an xattr should probably make
-the pidfd automatically persistent, i.e., the reference for the dentry
-is only put once the task is reaped.
-
-**Use-Case:** Store meta information alongside pidfds.
diff --git a/website/layouts/partials/docs/inject/content-before.html b/website/layouts/partials/docs/inject/content-before.html
deleted file mode 100644
index 2825159..0000000
--- a/website/layouts/partials/docs/inject/content-before.html
+++ /dev/null
@@ -1,19 +0,0 @@
-{{ with .Params.status }}
-  {{ $label := . }}
-  {{ if eq . "in-progress" }}{{ $label = "In Progress" }}{{ end }}
-  {{ if eq . "wishlist" }}{{ $label = "Wishlist" }}{{ end }}
-  {{ if eq . "completed" }}{{ $label = "Completed" }}{{ end }}
-  <span class="status-badge status-{{ . }}">{{ $label }}</span>
-{{ end }}
-
-{{ with .Params.categories }}
-  <div class="category-pills">
-    {{ range . }}
-      <a class="category-pill" href='{{ "/categories/" | relURL }}{{ . | urlize }}/'>{{ . }}</a>
-    {{ end }}
-  </div>
-{{ end }}
-
-{{ with .Params.commit }}
-  <p><span class="commit-ref">{{ . }}</span></p>
-{{ end }}
diff --git a/website/layouts/partials/docs/links/edit.html b/website/layouts/partials/docs/links/edit.html
index 8068105..02e8501 100644
--- a/website/layouts/partials/docs/links/edit.html
+++ b/website/layouts/partials/docs/links/edit.html
@@ -1,5 +1,5 @@
 {{- return (partial "docs/text/template" (dict "Template" .Site.Params.BookEditLink "Context" (dict
 "Site" .Site
 "Page" .Page
-"Path" .Page.File.Path
+"Path" (strings.TrimPrefix hugo.WorkingDir ( replace .Page.File.Path "_index.md" .Site.Params.BookIndexPage))
 )) | urls.JoinPath) -}}