Build / dev/kmem to enumerate all Linux kernel modules (including hidden ones)

Can Linux system have lsmod to enumerate all kernel modules? Isn't procfs fragrant? Why bother enumerating from / dev/kmem?

In fact, Linux is a later thing. In the initial UNIX era, enumeration processes such as ps were scanned from / dev/kmem, which is all files Later, the Linux kernel expanded procfs inappropriately, and put all kinds of messy things into it, such as modules, filesystems, vmallocinfo, etc., which are obviously not processes, all of them are thrown in, which is not appropriate, but because this is the Linux kernel, so everything is right!

Of course, Linux also retains / dev/mem and / dev/kmem, two extremely special and fun files:

  • /dev/mem: map all the physical memory of the system.
  • /dev/kmem: map all kernel state virtual memory of the system.

Later, because the permissions exposed by / dev/kmem are too large and there are security risks, the general kernel blocks this character device, only retains / dev/mem, and it is still limited:

# CONFIG_DEVKMEM is not set
CONFIG_STRICT_DEVMEM=y

This article shows how to enumerate all kernel modules by scanning / dev/kmem.

what? Isn't it true that config? Devkmem is disabled? Easy to do! This is the case when I replant it. In addition, the version I replant can support the mapping of vmalloc space.

The code is as follows:

// kmem.c
#include <linux/module.h>
#include <linux/mm.h>
#include <linux/kallsyms.h>
#include <linux/cdev.h>
#include <linux/fs.h>

pgprot_t (*_phys_mem_access_prot)(struct file *, unsigned long, unsigned long, pgprot_t);
phys_addr_t (*_slow_virt_to_phys)(void *);
pte_t *(*_lookup_address)(unsigned long, unsigned int *);

static const struct vm_operations_struct mmap_mem_ops = {
	.access = generic_access_phys
};

static int mmap_kmem(struct file *file, struct vm_area_struct *vma)
{
	unsigned long pfn;
	pte_t *pte;
	unsigned int level = 0;
	size_t size;

	// This is what I added. Because vmalloc space is included, it is necessary to prevent crash caused by the virtual address value of unmapped page.
	pte = _lookup_address((u64)vma->vm_pgoff << PAGE_SHIFT, &level);
	if (!pte || !pte_present(*pte))
		return -EIO;

	// pfn is obtained by a general method instead of only considering linear mapping.
	pfn = _slow_virt_to_phys((void *)(vma->vm_pgoff << PAGE_SHIFT)) >> PAGE_SHIFT;

	if (!pfn_valid(pfn))
		return -EIO;

	vma->vm_pgoff = pfn;

	size = vma->vm_end - vma->vm_start;
	vma->vm_page_prot = _phys_mem_access_prot(file, vma->vm_pgoff,
						 size,
						 vma->vm_page_prot);
	vma->vm_ops = &mmap_mem_ops;
	if (remap_pfn_range(vma,
			    vma->vm_start,
			    vma->vm_pgoff,
			    size,
			    vma->vm_page_prot)) {
		return -EAGAIN;
	}

	return 0;
}

static const struct file_operations kmem_fops = {
	.mmap		= mmap_kmem,
};

dev_t dev = 0;
static struct cdev kmem_cdev;
static int __init devkmem_init(void)
{
	_phys_mem_access_prot = (void *)kallsyms_lookup_name("phys_mem_access_prot");
	_slow_virt_to_phys = (void *)kallsyms_lookup_name("slow_virt_to_phys");
	_lookup_address = (void *)kallsyms_lookup_name("lookup_address");

	if((alloc_chrdev_region(&dev, 0, 1, "test_dev")) <0){
		printk("alloc failed\n");
		return -1;
	}
	printk("major=%d minor=%d \n",MAJOR(dev), MINOR(dev));

	cdev_init(&kmem_cdev, &kmem_fops);

	if ((cdev_add(&kmem_cdev, dev, 1)) < 0) {
		printk("add failed\n");
		goto out;
	}
	return 0;

out:
	unregister_chrdev_region(dev,1);
	return -1;
}

void __exit devkmem_exit(void)
{
	cdev_del(&kmem_cdev);
	unregister_chrdev_region(dev, 1);
}

module_init(devkmem_init);
module_exit(devkmem_exit);
MODULE_LICENSE("GPL");

OK, we compile and load it, and create character device:

insmod ./kmem.ko
mknod /dev/kmem c 248 0

Then, the following script shows how to enumerate all the modules. Because I map and parse address by address, and my bash level is very low, neither python nor go, so the efficiency of the script is very low, and the operation is very slow. If all the memory from 0xffffffa00000000 to 0xffffffff000000 is mapped at one time, it will be much faster. Although it is slow, it is absolutely detailed enough. Let's see:

#!/bin/bash

# mlist.sh

start=''
end=''
base=''

moktype=$(cat /proc/kallsyms|grep module_ktype|awk '{print $1}')
# For pattern matching
moktype=$(echo $moktype|tr 'a-z' 'A-Z')

for line in $(cat /proc/vmallocinfo |grep 0xffffffffa|awk '{print $1}')
do
	start=$(echo $line|awk -F '-' '{print $1}'|awk -F '0x' '{print $2}')
	start=$(echo $start|tr 'a-z' 'A-Z')
	end=$(echo $line|awk -F '-' '{print $2}'|awk -F '0x' '{print $2}')
	end=$(echo $end|tr 'a-z' 'A-Z')
	base=$start
	next=$base
	while true; do
		val=$(./a.out $next);
		if [ $? -ne 0 ]; then
			break;
		fi
		if [ $val == $base ]; then
			mod=$(echo "ibase=16;$next-138"|bc)
			mod=$(echo "obase=16;$mod"|bc)
			state=$(./a.out $mod)
			if [ $? -ne 0 ] || [ $state != '0' ]; then
				next=$(echo "ibase=16;$next+8"|bc)
				next=$(echo "obase=16;$next"|bc)
				continue;
			fi
			ktype=$(echo "ibase=16;$mod+78"|bc)
			ktype=$(echo "obase=16;$ktype"|bc)
			type=$(./a.out $ktype)
			if [ $? -ne 0 ] || [ $type != $moktype ]; then
				next=$(echo "ibase=16;$next+8"|bc)
				next=$(echo "obase=16;$next"|bc)
				continue;
			fi

			namea=$(echo "ibase=16;$mod+18"|bc)
			namea=$(echo "obase=16;$namea"|bc)
			name=$(./a.out $namea)
			# Only the first eight characters of the name are truncated
			name=$(echo -n $name|sed 's/\([0-9A-F]\{2\}\)/\\\\\\x\1/gI' | xargs printf)
			name=$(echo $name|rev 2>/dev/null)
			if [ $? -eq 0 ]; then
				echo name-- $name
			fi
		fi
		next=$(echo "ibase=16;$next+8"|bc)
		next=$(echo "obase=16;$next"|bc)
	done
done;

The results are as follows:

[root@localhost test]# ./mlist.sh
name-- dm_mod
name-- dm_regio
name-- serio_ra
name-- dm_log
name-- dm_mirro
name-- libata
name-- ahci
name-- ata_gene
name-- crct10di
name-- e1000
name-- pata_acp
name-- cdrom
name-- sr_mod
name-- crc_t10d
name-- sd_mod
name-- ablk_hel
name-- libcrc32
name-- ip_table
name-- video
name-- i2c_piix
name-- parport
name-- cryptd
name-- parport_
name-- ata_piix
name-- kmem
name-- i2c_core
...

Here is a brief explanation.

We know that the address mapping space of modules is from 0xffffffffa00000000 to 0xffffffff000000, so we need to find them in this space. As long as the modules are loaded through init module system call in a normal way, they must be in this space, so we only need to scan the memory of this space to match the two key features:

  • The self referential feature of module core.
  • ktype feature of kobject of module.

OK, let's look at how to find the hidden module in this address space, that is, the module of chain removal:

  • Scan the gaps!!

Come on:

#!/bin/bash

start=''
end=''
base=''

moktype=$(cat /proc/kallsyms|grep module_ktype|awk '{print $1}')
moktype=$(echo $moktype|tr 'a-z' 'A-Z')

for line in $(cat /proc/vmallocinfo |grep 0xffffffffa|awk '{print $1}')
do
	start=$(echo $line|awk -F '-' '{print $1}'|awk -F '0x' '{print $2}')
	start=$(echo $start|tr 'a-z' 'A-Z')
	if [ $start == 'FFFFFFFFA0000000' ]; then
		end=$(echo $line|awk -F '-' '{print $2}'|awk -F '0x' '{print $2}')
		end=$(echo $end|tr 'a-z' 'A-Z')
		continue;
	fi
	if [ $start == $end ];then
		end=$(echo $line|awk -F '-' '{print $2}'|awk -F '0x' '{print $2}')
		end=$(echo $end|tr 'a-z' 'A-Z')
		continue;
	fi

	base=$end
	next=$base

	end=$(echo $line|awk -F '-' '{print $2}'|awk -F '0x' '{print $2}')
	end=$(echo $end|tr 'a-z' 'A-Z')

	while true; do
		val=$(./a.out $next);
		if [ $? -ne 0 ]; then
			break;
		fi
		if [ $val == $base ]; then
			mod=$(echo "ibase=16;$next-138"|bc)
			mod=$(echo "obase=16;$mod"|bc)
			state=$(./a.out $mod)
			if [ $? -ne 0 ] || [ $state != '0' ]; then
				next=$(echo "ibase=16;$next+8"|bc)
				next=$(echo "obase=16;$next"|bc)
				continue;
			fi
			ktype=$(echo "ibase=16;$mod+78"|bc)
			ktype=$(echo "obase=16;$ktype"|bc)
			type=$(./a.out $ktype)
			if [ $? -ne 0 ] || [ $type != $moktype ]; then
				next=$(echo "ibase=16;$next+8"|bc)
				next=$(echo "obase=16;$next"|bc)
				continue;
			fi

			namea=$(echo "ibase=16;$mod+18"|bc)
			namea=$(echo "obase=16;$namea"|bc)
			name=$(./a.out $namea)
			name=$(echo -n $name|sed 's/\([0-9A-F]\{2\}\)/\\\\\\x\1/gI' | xargs printf)
			name=$(echo $name|rev 2>/dev/null)
			if [ $? -eq 0 ]; then
				echo name-- $name
			fi
		fi
		next=$(echo "ibase=16;$next+8"|bc)
		next=$(echo "obase=16;$next"|bc)
	done
done;

Try it. As long as the module is moved after the insmod command is loaded, it's easy to find out the hidden module, only for a long time.

It's a long time because my script is very inefficient. In fact, if every page is mapped once, it will be much better. But because I can't program, I have to do this first.

As mentioned earlier, as long as the modules loaded through the insmod command, i.e. init module system call, can be found. What if you don't load the module in this normal way?

In fact, think about the number of entries into the kernel, especially the entry of code into the kernel

  • init_module
  • ptrace
  • ftrace
  • eBPF
  • ...

There are not many in nature. Init module is the most commonly used and easiest. If you don't use init module, what else can you use?

Wenzhou leather shoes in Zhejiang Province are wet, and they will not be fat if it rains and floods.

Tags: Linux Unix Python

Posted on Sat, 16 May 2020 23:53:43 -0400 by sanderphp