一、物理地址空间
“内存映射即在进程的虚拟地址空间中创建一个映射,分为两种:
”
【原理】:创建内存映射时,在进程的用户虚拟地址空间中分配一个虚拟内存区域。内核采用延迟分配物理内存的策略,在进程第一次访问虚拟页的时候,产生缺页异常。==如果是文件映射,那么分配物理页,把文件指定区间的数据读到物理页中,然后在页表中把虚拟页映射到物理页。如果是匿名映射,就分配物理页,然后在页表中把虚拟页映射到物理页。==(1)两个进程可以使用共享的文件映射实现共享内存。匿名映射通常是私有映射,共享的匿名映射只可能出现父进程和子进程之间。在进程的虚拟地址空间中,代码段和数据段是私有的文件映射,未初始化数据段、堆栈是私有的匿名映射。(2)修改过的脏页面不会立即更新到文件中,可以调用msync来强制同步写入文件。
flowchart LR
task_struct --> mm_struct --> vm_area_struct
struct vm_area_struct {
/* The first cache line has the info for VMA tree walking. */
// 这两个成员分别用来保存该虚拟内存空间的首地址和末地址后第一个字节的地址
unsigned long vm_start; /* Our start address within vm_mm. */
unsigned long vm_end; /* The first byte after our end address within vm_mm. */
/* linked list of VM areas per task, sorted by address */
struct vm_area_struct *vm_next, *vm_prev;
// 如果采用链表组织化,会影响它搜索速度问题,解决此问题采用红黑树(每个进程结构体mm_struct中都
// 创建一颗红黑树,将VMA作为一个节点加入红黑树给中,这样可以提升搜索速度)
struct rb_node vm_rb;
/*
* Largest free memory gap in bytes to the left of this VMA.
* Either between this VMA and vma->vm_prev, or between one of the
* VMAs below us in the VMA rbtree and its ->vm_prev. This helps
* get_unmapped_area find a free area of the right size.
*/
unsigned long rb_subtree_gap;
/* Second cache line starts here. */
struct mm_struct *vm_mm; /* The address space we belong to. */
pgprot_t vm_page_prot; /* Access permissions of this VMA. */
unsigned long vm_flags; /* Flags, see mm.h. */
/*
* For areas with an address space and backing store,
* linkage into the address_space->i_mmap interval tree.
*/
struct {
struct rb_node rb;
unsigned long rb_subtree_last;
} shared;
/*
* A file's MAP_PRIVATE vma can be in both i_mmap tree and anon_vma
* list, after a COW of one of the file pages. A MAP_SHARED vma
* can only be in the i_mmap tree. An anonymous MAP_PRIVATE, stack
* or brk vma (with NULL file) can only be in an anon_vma list.
*/
struct list_head anon_vma_chain; /* Serialized by mmap_sem &
* page_table_lock */
struct anon_vma *anon_vma; /* Serialized by page_table_lock */
/* Function pointers to deal with this struct. */
const struct vm_operations_struct *vm_ops;
/* Information about our backing store: */
unsigned long vm_pgoff; /* Offset (within vm_file) in PAGE_SIZE units, *not* PAGE_CACHE_SIZE */
struct file * vm_file; // 文件,如果是私有的匿名映射,该成员为空指针
void * vm_private_data; /* 指向内存的私有数据 */
#ifndef CONFIG_MMU
struct vm_region *vm_region; /* NOMMU mapping region */
#endif
#ifdef CONFIG_NUMA
struct mempolicy *vm_policy; /* NUMA policy for the VMA */
#endif
struct vm_userfaultfd_ctx vm_userfaultfd_ctx;
};
struct vm_operations_struct {
void (*open)(struct vm_area_struct * area); // 在创建虚拟内存区域时调用open方法
void (*close)(struct vm_area_struct * area); // 在删除虚拟内存区域时调用close方法
int (*mremap)(struct vm_area_struct * area); // 使用系统调用mremap移动虚拟内存区域时调用
int (*fault)(struct vm_area_struct *vma, struct vm_fault *vmf); // 访问文件映射的虚拟页时,如果没有映射到物理页,生成
// 缺页异常,异常处理程序调用fault方法来把文件的数据读到文件页缓存当中
int (*pmd_fault)(struct vm_area_struct *, unsigned long address,
pmd_t *, unsigned int flags); // 与fault类似,区别是该方法针对使用透明巨型页的文件映射
/* 读文件映射的虚拟页时,如果没有映射到物理页,生成缺页异常,异常处理程序除了读入正在访问的文件页
还会预读后续文件页,调用map_pages方法在文件的页缓存中分配物理页 */
void (*map_pages)(struct vm_area_struct *vma, struct vm_fault *vmf);
/* notification that a previously read-only page is about to become
* writable, if an error is returned it will cause a SIGBUS */
/* 第一次写私有的文件映射时,生成页错误异常,异常处理程序执行写时复制,调用page_mkwrite方法以
通知文件系统页即将变成可写,以便文件系统检查是否允许写,或者等待页进入合适的状态*/
int (*page_mkwrite)(struct vm_area_struct *vma, struct vm_fault *vmf);
/* same as page_mkwrite when using VM_PFNMAP|VM_MIXEDMAP */
int (*pfn_mkwrite)(struct vm_area_struct *vma, struct vm_fault *vmf);
/* called by access_process_vm when get_user_pages() fails, typically
* for use by special VMAs that can switch between memory and hardware
*/
int (*access)(struct vm_area_struct *vma, unsigned long addr,
void *buf, int len, int write);
/* Called by the /proc/PID/maps code to ask the vma whether it
* has a special name. Returning non-NULL will also cause this
* vma to be dumped unconditionally. */
const char *(*name)(struct vm_area_struct *vma);
#ifdef CONFIG_NUMA
/*
* set_policy() op must add a reference to any non-NULL @new mempolicy
* to hold the policy upon return. Caller should pass NULL @new to
* remove a policy and fall back to surrounding context--i.e. do not
* install a MPOL_DEFAULT policy, nor the task or system default
* mempolicy.
*/
int (*set_policy)(struct vm_area_struct *vma, struct mempolicy *new);
/*
* get_policy() op must add reference [mpol_get()] to any policy at
* (vma,addr) marked as MPOL_SHARED. The shared policy infrastructure
* in mm/mempolicy.c will do this automatically.
* get_policy() must NOT add a ref if the policy at (vma,addr) is not
* marked as MPOL_SHARED. vma policies are protected by the mmap_sem.
* If no [shared/vma] mempolicy exists at the addr, get_policy() op
* must return NULL--i.e., do not "fallback" to task or system default
* policy.
*/
struct mempolicy *(*get_policy)(struct vm_area_struct *vma,
unsigned long addr);
#endif
/*
* Called by vm_normal_page() for special PTEs to find the
* page for @addr. This is useful if the default behavior
* (using pte_page()) would not find the correct page.
*/
struct page *(*find_special_page)(struct vm_area_struct *vma,
unsigned long addr);
};
“”
- 应用程序通常使用C标准库提供的函数malloc()申请内存。glibc库的内存分配器ptmalloc使用brk或mmap向内核以页为单位申请虚拟内存,然后把页划分成小内存块分配给应用程序。默认的阈值是128kb,如果应用程序申请的内存长度小于阈值,ptmalloc分配器使用brk向内核申请虚拟内存,否则ptmalloc分配器使用mmap向内核申请虚拟内存。
- 应用程序可以直接使用mmap向内核申请虚拟内存。
“#include
”void *mmap(void *addr, size_t length, int prot, int flags, int fd, off_t offset);
“#include
”int munmap(void *addr, size_t len);
#include
#include
#include
#include
#include
#include
#include
typedef struct
{
/* data */
char name[6];
int age;
} people;
int main(int argc, char **argv)
{
int fd, i;
people *p_map;
char temp;
fd = open(argv[1], O_CREAT | O_RDWR | O_TRUNC, 00777);
lseek(fd, sizeof(people) * 5 - 1, SEEK_SET);
write(fd, "", 1);
p_map = (people *)mmap(NULL, sizeof(people) * 10, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
if (p_map == (void *)-1)
{
fprintf(stderr, "mmap: %s
", strerror(errno));
return -1;
}
temp = 'A';
close(fd);
for (i = 0; i < 10; i++)
{
temp = temp + 1;
(*(p_map + i)).name[1] = '';
memcpy((*(p_map + i)).name, &temp, 1);
(*(p_map + i)).age = 30 + i;
}
printf("Initialize.
");
sleep(15);
munmap(p_map, sizeof(people) * 10);
printf("UMA OK.
");
return 0;
}
#include
#include
#include
#include
#include
#include
#include
typedef struct
{
/* data */
char name[6];
int age;
} people;
int main(int argc, char **argv)
{
int fd, i;
people *p_map;
fd = open(argv[1], O_CREAT | O_RDWR, 00777);
p_map = (people *)mmap(NULL, sizeof(people) * 10, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
if (p_map == (void *)-1)
{
fprintf(stderr, "mmap: %s
", strerror(errno));
return -1;
}
for (i = 0; i < 10; i++)
{
printf("name: %s age: %d
", (*(p_map + i)).name, (*(p_map + i)).age);
}
munmap(p_map, sizeof(people) * 10);
return 0;
}
审核编辑:陈陈
全部0条评论
快来发表一下你的评论吧 !