kernel源码分析 do_msgsnd read_msg
笔者分析的源码是v 5.11.22
链接:msg.c - ipc/msg.c - Linux source code v5.11.22 - Bootlin
do_msgsnd
static long do_msgsnd(int msqid, long mtype, void __user *mtext,size_t msgsz, int msgflg)
{struct msg_queue *msq;struct msg_msg *msg;int err;struct ipc_namespace *ns;DEFINE_WAKE_Q(wake_q);ns = current->nsproxy->ipc_ns;if (msgsz > ns->msg_ctlmax || (long) msgsz < 0 || msqid < 0)return -EINVAL;if (mtype < 1)return -EINVAL;msg = load_msg(mtext, msgsz);if (IS_ERR(msg))return PTR_ERR(msg);msg->m_type = mtype;msg->m_ts = msgsz;rcu_read_lock();msq = msq_obtain_object_check(ns, msqid);if (IS_ERR(msq)) {err = PTR_ERR(msq);goto out_unlock1;}ipc_lock_object(&msq->q_perm);for (;;) {struct msg_sender s;err = -EACCES;if (ipcperms(ns, &msq->q_perm, S_IWUGO))goto out_unlock0;/* raced with RMID? */if (!ipc_valid_object(&msq->q_perm)) {err = -EIDRM;goto out_unlock0;}err = security_msg_queue_msgsnd(&msq->q_perm, msg, msgflg);if (err)goto out_unlock0;if (msg_fits_inqueue(msq, msgsz))break;/* queue full, wait: */if (msgflg & IPC_NOWAIT) {err = -EAGAIN;goto out_unlock0;}/* enqueue the sender and prepare to block */ss_add(msq, &s, msgsz);if (!ipc_rcu_getref(&msq->q_perm)) {err = -EIDRM;goto out_unlock0;}ipc_unlock_object(&msq->q_perm);rcu_read_unlock();schedule();rcu_read_lock();ipc_lock_object(&msq->q_perm);ipc_rcu_putref(&msq->q_perm, msg_rcu_free);/* raced with RMID? */if (!ipc_valid_object(&msq->q_perm)) {err = -EIDRM;goto out_unlock0;}ss_del(&s);if (signal_pending(current)) {err = -ERESTARTNOHAND;goto out_unlock0;}}ipc_update_pid(&msq->q_lspid, task_tgid(current));msq->q_stime = ktime_get_real_seconds();if (!pipelined_send(msq, msg, &wake_q)) {/* no one is waiting for this message, enqueue it */list_add_tail(&msg->m_list, &msq->q_messages);msq->q_cbytes += msgsz;msq->q_qnum++;atomic_add(msgsz, &ns->msg_bytes);atomic_inc(&ns->msg_hdrs);}err = 0;msg = NULL;out_unlock0:ipc_unlock_object(&msq->q_perm);wake_up_q(&wake_q);
out_unlock1:rcu_read_unlock();if (msg != NULL)free_msg(msg);return err;
}
首先进行判断
if (msgsz > ns->msg_ctlmax || (long) msgsz < 0 || msqid < 0)return -EINVAL;
if (mtype < 1)return -EINVAL;
合法的msqid都是大于等于0的,这个不用管,需要注意的是mtype<1是invalid argument,所以在创建msg_msg时,要确保mtype>=1。
接下来
msg = load_msg(mtext, msgsz);
struct msg_msg *load_msg(const void __user *src, size_t len)
{struct msg_msg *msg;struct msg_msgseg *seg;int err = -EFAULT;size_t alen;msg = alloc_msg(len);if (msg == NULL)return ERR_PTR(-ENOMEM);alen = min(len, DATALEN_MSG);if (copy_from_user(msg + 1, src, alen))goto out_err;for (seg = msg->next; seg != NULL; seg = seg->next) {len -= alen;src = (char __user *)src + alen;alen = min(len, DATALEN_SEG);if (copy_from_user(seg + 1, src, alen))goto out_err;}err = security_msg_msg_alloc(msg);if (err)goto out_err;return msg;out_err:free_msg(msg);return ERR_PTR(err);
}
static struct msg_msg *alloc_msg(size_t len)
{struct msg_msg *msg;struct msg_msgseg **pseg;size_t alen;alen = min(len, DATALEN_MSG);msg = kmalloc(sizeof(*msg) + alen, GFP_KERNEL_ACCOUNT);if (msg == NULL)return NULL;msg->next = NULL;msg->security = NULL;len -= alen;pseg = &msg->next;while (len > 0) {struct msg_msgseg *seg;cond_resched();alen = min(len, DATALEN_SEG);seg = kmalloc(sizeof(*seg) + alen, GFP_KERNEL_ACCOUNT);if (seg == NULL)goto out_err;*pseg = seg;seg->next = NULL;pseg = &seg->next;len -= alen;}return msg;out_err:free_msg(msg);return NULL;
}
struct msg_msgseg {struct msg_msgseg *next;/* the next part of the message follows immediately */
};#define DATALEN_MSG ((size_t)PAGE_SIZE-sizeof(struct msg_msg))
#define DATALEN_SEG ((size_t)PAGE_SIZE-sizeof(struct msg_msgseg))
总的来说,就是分配我们传入的bufsz大小的msg_msg,然后将mtext给copy过去,注意,我们这里的msg->security==NULL。
struct msg_msg {struct list_head m_list;long m_type;size_t m_ts; /* message text size */struct msg_msgseg *next;void *security;/* the actual message follows immediately */
};
struct msg_queue {struct kern_ipc_perm q_perm;time64_t q_stime; /* last msgsnd time */time64_t q_rtime; /* last msgrcv time */time64_t q_ctime; /* last change time */unsigned long q_cbytes; /* current number of bytes on queue */unsigned long q_qnum; /* number of messages in queue */unsigned long q_qbytes; /* max number of bytes on queue */struct pid *q_lspid; /* pid of last msgsnd */struct pid *q_lrpid; /* last receive pid */struct list_head q_messages;struct list_head q_receivers;struct list_head q_senders;
} __randomize_layout;
list_add_tail(&msg->m_list, &msq->q_messages);
具体将msg_msg链入的一部是这个,list_add_tail
static inline void list_add_tail(struct list_head *new, struct list_head *head)
{__list_add(new, head->prev, head);
}
static inline void __list_add(struct list_head *new,struct list_head *prev,struct list_head *next)
{next->prev = new;new->next = next;new->prev = prev;prev->next = new;
}
我们也可以知道msgq中的一些字段的含义
msq->q_cbytes += msgsz;
msq->q_qnum++;
q_cbtytes字段就是这个queue中存储的总字节数
q_qnum就是拥有的msg_msg的数量。
从kernel pwn的角度来说,能用的就是load_msg那里,先用alloc_msg构建好msg_msg,所有next指针都分配好了,然后再copy,可以用userfault卡在copy_from_user,然后修改next指针实现地址任意写。
接下来我们分析msg_read
msg_read
很多细节前面的msgrcv分析copy都讲了,这里主要讲不一样的。
static inline int convert_mode(long *msgtyp, int msgflg)
{if (msgflg & MSG_COPY)return SEARCH_NUMBER;/** find message of correct type.* msgtyp = 0 => get first.* msgtyp > 0 => get first message of matching type.* msgtyp < 0 => get message with least type must be < abs(msgtype).*/if (*msgtyp == 0)return SEARCH_ANY;if (*msgtyp < 0) {if (*msgtyp == LONG_MIN) /* -LONG_MIN is undefined */*msgtyp = LONG_MAX;else*msgtyp = -*msgtyp;return SEARCH_LESSEQUAL;}if (msgflg & MSG_EXCEPT)return SEARCH_NOTEQUAL;return SEARCH_EQUAL;
}
我们的msgtpy是0,所以mode就是SEARCH_EQUAL
static struct msg_msg *find_msg(struct msg_queue *msq, long *msgtyp, int mode)
{struct msg_msg *msg, *found = NULL;long count = 0;list_for_each_entry(msg, &msq->q_messages, m_list) {if (testmsg(msg, *msgtyp, mode) &&!security_msg_queue_msgrcv(&msq->q_perm, msg, current,*msgtyp, mode)) {if (mode == SEARCH_LESSEQUAL && msg->m_type != 1) {*msgtyp = msg->m_type - 1;found = msg;} else if (mode == SEARCH_NUMBER) {if (*msgtyp == count)return msg;} elsereturn msg;count++;}}return found ?: ERR_PTR(-EAGAIN);
}
static int testmsg(struct msg_msg *msg, long type, int mode)
{switch (mode) {case SEARCH_ANY:case SEARCH_NUMBER:return 1;case SEARCH_LESSEQUAL:if (msg->m_type <= type)return 1;break;case SEARCH_EQUAL:if (msg->m_type == type)return 1;break;case SEARCH_NOTEQUAL:if (msg->m_type != type)return 1;break;}return 0;
}
可以看到,最后会找到与我们传入的type相等的msg_msg
if ((bufsz < msg->m_ts) && !(msgflg & MSG_NOERROR)) {msg = ERR_PTR(-E2BIG);goto out_unlock0;}
我们传入的msgflg是0,所以后面是真,也就是说,如果我们传入的bufsz<msg->m_ts,是没用的,所以我们传入的bufsz必须大于或等于msg->m_ts。
接下来会进入这个函数。
list_del(&msg->m_list);
static inline void list_del(struct list_head *entry)
{__list_del_entry(entry);entry->next = LIST_POISON1;entry->prev = LIST_POISON2;
}
static inline void __list_del_entry(struct list_head *entry)
{if (!__list_del_entry_valid(entry))return;__list_del(entry->prev, entry->next);
}
static inline void __list_del(struct list_head * prev, struct list_head * next)
{next->prev = prev;WRITE_ONCE(prev->next, next);
}
static inline bool __list_del_entry_valid(struct list_head *entry)
{return true;
}
其实就是脱链操作,从kernel pwn的角度,_list_del_entry_valid是直接返回true,这意味着,在这个版本,我们只需随意把这两个指针覆盖为有效可写地址就行了,或许可以构造unlink攻击?
话说其他版本会不会在此增加一些验证???
其他和前篇的msgrcv相似,就不说了。