打开APP
userphoto
未登录

开通VIP,畅享免费电子书等14项超值服

开通VIP
Linux内存管理:DMA - 博客 - 伯乐在线
原文出处: linuxDOS   欢迎分享原创到伯乐头条

说起DMA我们并不陌生,但是实际编程中去用的人不多吧,最多就是网卡驱动里的环形buffer,再有就是设备的dma,下面我们就分析分析.
DMA用来在设备内存和内存之间直接数据交互。而无需cpu干预


内核为了方便驱动的开发,已经提供了几个dma 函数接口。

dma跟硬件架构相关,所以linux关于硬件部分已经给屏蔽了,有兴趣的可以深入跟踪学习.

按照linux内核对dma层的架构设计,各平台dma缓冲区映射之间的差异由内核定义的一个dma操作集

include/linux/dma-mapping.h:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
struct dma_map_ops {
    void* (*alloc)(struct device *dev, size_t size,
                dma_addr_t *dma_handle, gfp_t gfp,
                struct dma_attrs *attrs);
    void (*free)(struct device *dev, size_t size,
             void *vaddr, dma_addr_t dma_handle,
             struct dma_attrs *attrs);
    int (*mmap)(struct device *, struct vm_area_struct *,
             void *, dma_addr_t, size_t, struct dma_attrs *attrs);
    int (*get_sgtable)(struct device *dev, struct sg_table *sgt, void *,
             dma_addr_t, size_t, struct dma_attrs *attrs);
    dma_addr_t (*map_page)(struct device *dev, struct page *page,
             unsigned long offset, size_t size,
             enum dma_data_direction dir,
             struct dma_attrs *attrs);
    void (*unmap_page)(struct device *dev, dma_addr_t dma_handle,
             size_t size, enum dma_data_direction dir,
             struct dma_attrs *attrs);
    int (*map_sg)(struct device *dev, struct scatterlist *sg,
         int nents, enum dma_data_direction dir,
         struct dma_attrs *attrs);
    void (*unmap_sg)(struct device *dev,
             struct scatterlist *sg, int nents,
             enum dma_data_direction dir,
             struct dma_attrs *attrs);
    void (*sync_single_for_cpu)(struct device *dev,
                 dma_addr_t dma_handle, size_t size,
                 enum dma_data_direction dir);
    void (*sync_single_for_device)(struct device *dev,
                 dma_addr_t dma_handle, size_t size,
                 enum dma_data_direction dir);
    void (*sync_sg_for_cpu)(struct device *dev,
                struct scatterlist *sg, int nents,
                enum dma_data_direction dir);
    void (*sync_sg_for_device)(struct device *dev,
                 struct scatterlist *sg, int nents,
                 enum dma_data_direction dir);
    int (*mapping_error)(struct device *dev, dma_addr_t dma_addr);
    int (*dma_supported)(struct device *dev, u64 mask);
    int (*set_dma_mask)(struct device *dev, u64 mask);
#ifdef ARCH_HAS_DMA_GET_REQUIRED_MASK
    u64 (*get_required_mask)(struct device *dev);
#endif
    int is_phys;
}

来统一屏蔽实现的差异.
不同差异主要来来自cache的问题
Cache与dma同步问题,这里不深入讨论.

另外一个常用的函数是Dma_set_mask, 为了通知内核设备能够寻址的范围,很多时候设备能够寻址的范围有限。

Dma映射可以分为三类:

1. 一致性dma映射 dma_alloc_coherent (问题:驱动使用的buffer不是自身申请的,而是其他模块)
当驱动模块主动分配一个Dma缓冲区并且dma生存期和模块一样时

参数说明:

(1)这个函数的返回值是缓冲的一个内核虚拟地址, 它可被驱动使用
(2)第三个参数dma_handle:
其间相关的物理地址在 dma_handle 中返回

2. 流式dma映射 dma_map_single
通常用于把内核一段buffer映射,返回物理地址.
如果驱动模块需要使用从别的模块传进来的虚拟地址空间作为dma缓冲区,保证地址的线性 cache一致性
一致性api接口:sync_single_for_cpu

3.分散/聚集映射(scatter/gather map) Dma_map_sgs

有时候我们还需要
1. 回弹缓冲区 bounce buffer:当cpu侧物理地址不适合设备的dma操作的时候

2.
DmA内存池:一般dma映射都是单个page的整数倍,如果驱动程序需要更小的一致性映射的dma缓冲区,可以使用。类似于slab机制,
Dma_pool_create

下面我们就那网卡驱动的例子说说dma的具体应用,参考linux kernel e1000网卡
drivers/net/ethernet/intel/e1000/*
Ring buffer

Dma不能为高端内存,一般为32,默认低端内存,由于设备能够访问的地址范围有限。
设备使用物理地址,而代码使用虚拟地址。

就看看如何发送数据包:e1000_main.c:

e1000_xmit_frame: 关于帧的发送流程这里不多说.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
static netdev_tx_t e1000_xmit_frame(struct sk_buff *skb,
                 struct net_device *netdev)
{
    struct e1000_adapter *adapter = netdev_priv(netdev);
    struct e1000_hw *hw = &adapter->hw;
    struct e1000_tx_ring *tx_ring;
    unsigned int first, max_per_txd = E1000_MAX_DATA_PER_TXD;
    unsigned int max_txd_pwr = E1000_MAX_TXD_PWR;
    unsigned int tx_flags = 0;
    unsigned int len = skb_headlen(skb);
    unsigned int nr_frags;
    unsigned int mss;
    int count = 0;
    int tso;
    unsigned int f;
    /* This goes back to the question of how to logically map a tx queue
     * to a flow. Right now, performance is impacted slightly negatively
     * if using multiple tx queues. If the stack breaks away from a
     * single qdisc implementation, we can look at this again. */
    tx_ring = adapter->tx_ring;
    if (unlikely(skb->len <= 0)) {
        dev_kfree_skb_any(skb);
        return NETDEV_TX_OK;
    }
    /* On PCI/PCI-X HW, if packet size is less than ETH_ZLEN,
     * packets may get corrupted during padding by HW.
     * To WA this issue, pad all small packets manually.
     */
    if (skb->len < ETH_ZLEN) {
        if (skb_pad(skb, ETH_ZLEN - skb->len))
            return NETDEV_TX_OK;
        skb->len = ETH_ZLEN;
        skb_set_tail_pointer(skb, ETH_ZLEN);
    }
    mss = skb_shinfo(skb)->gso_size;
    /* The controller does a simple calculation to
     * make sure there is enough room in the FIFO before
     * initiating the DMA for each buffer. The calc is:
     * 4 = ceil(buffer len/mss). To make sure we don't
     * overrun the FIFO, adjust the max buffer len if mss
     * drops. */
    if (mss) {
        u8 hdr_len;
        max_per_txd = min(mss << 2, max_per_txd);
        max_txd_pwr = fls(max_per_txd) - 1;
        hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
        if (skb->data_len && hdr_len == len) {
            switch (hw->mac_type) {
                unsigned int pull_size;
            case e1000_82544:
                /* Make sure we have room to chop off 4 bytes,
                 * and that the end alignment will work out to
                 * this hardware's requirements
                 * NOTE: this is a TSO only workaround
                 * if end byte alignment not correct move us
                 * into the next dword */
                if ((unsigned long)(skb_tail_pointer(skb) - 1) & 4)
                    break;
                /* fall through */
                pull_size = min((unsigned int)4, skb->data_len);
                if (!__pskb_pull_tail(skb, pull_size)) {
                    e_err(drv, '__pskb_pull_tail '
                     'failed.\n');
                    dev_kfree_skb_any(skb);
                    return NETDEV_TX_OK;
                }
                len = skb_headlen(skb);
                break;
            default:
                /* do nothing */
                break;
            }
        }
    }
    /* reserve a descriptor for the offload context */
    if ((mss) || (skb->ip_summed == CHECKSUM_PARTIAL))
        count++;
    count++;
    /* Controller Erratum workaround */
    if (!skb->data_len && tx_ring->last_tx_tso && !skb_is_gso(skb))
        count++;
    count += TXD_USE_COUNT(len, max_txd_pwr);
    if (adapter->pcix_82544)
        count++;
    /* work-around for errata 10 and it applies to all controllers
     * in PCI-X mode, so add one more descriptor to the count
     */
    if (unlikely((hw->bus_type == e1000_bus_type_pcix) &&
            (len > 2015)))
        count++;
    nr_frags = skb_shinfo(skb)->nr_frags;
    for (f = 0; f < nr_frags; f++)
        count += TXD_USE_COUNT(skb_frag_size(&skb_shinfo(skb)->frags[f]),
                 max_txd_pwr);
    if (adapter->pcix_82544)
        count += nr_frags;
    /* need: count + 2 desc gap to keep tail from touching
     * head, otherwise try next time */
    if (unlikely(e1000_maybe_stop_tx(netdev, tx_ring, count + 2)))
        return NETDEV_TX_BUSY;
    if (unlikely((hw->mac_type == e1000_82547) &&
         (e1000_82547_fifo_workaround(adapter, skb)))) {
        netif_stop_queue(netdev);
        if (!test_bit(__E1000_DOWN, &adapter->flags))
            schedule_delayed_work(&adapter->fifo_stall_task, 1);
        return NETDEV_TX_BUSY;
    }
    if (vlan_tx_tag_present(skb)) {
        tx_flags |= E1000_TX_FLAGS_VLAN;
        tx_flags |= (vlan_tx_tag_get(skb) << E1000_TX_FLAGS_VLAN_SHIFT);
    }
    first = tx_ring->next_to_use;
    tso = e1000_tso(adapter, tx_ring, skb);
    if (tso < 0) {
        dev_kfree_skb_any(skb);
        return NETDEV_TX_OK;
    }
    if (likely(tso)) {
        if (likely(hw->mac_type != e1000_82544))
            tx_ring->last_tx_tso = true;
        tx_flags |= E1000_TX_FLAGS_TSO;
    } else if (likely(e1000_tx_csum(adapter, tx_ring, skb)))
        tx_flags |= E1000_TX_FLAGS_CSUM;
    if (likely(skb->protocol == htons(ETH_P_IP)))
        tx_flags |= E1000_TX_FLAGS_IPV4;
    if (unlikely(skb->no_fcs))
        tx_flags |= E1000_TX_FLAGS_NO_FCS;
    count = e1000_tx_map(adapter, tx_ring, skb, first, max_per_txd,
     nr_frags, mss);
    if (count) {
        netdev_sent_queue(netdev, skb->len);
        skb_tx_timestamp(skb);
        e1000_tx_queue(adapter, tx_ring, tx_flags, count);
        /* Make sure there is space in the ring for the next send. */
        e1000_maybe_stop_tx(netdev, tx_ring, MAX_SKB_FRAGS + 2);
    } else {
        dev_kfree_skb_any(skb);
        tx_ring->buffer_info[first].time_stamp = 0;
        tx_ring->next_to_use = first;
    }
    return NETDEV_TX_OK;
}

经过上次,邻居子系统后,数据帧已经到达驱动,数据放在skb指定的内存里.
看代码
tx_ring = adapter->tx_ring; // 获取发送的ring buffer

接着我们看关键代码:
count = e1000_tx_map(adapter, tx_ring, skb, first, max_per_txd, nr_frags, mss);

它做了什么呢?

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
static int e1000_tx_map(struct e1000_adapter *adapter,
            struct e1000_tx_ring *tx_ring,
            struct sk_buff *skb, unsigned int first,
            unsigned int max_per_txd, unsigned int nr_frags,
            unsigned int mss)
{
    struct e1000_hw *hw = &adapter->hw;
    struct pci_dev *pdev = adapter->pdev;
    struct e1000_buffer *buffer_info;
    unsigned int len = skb_headlen(skb);
    unsigned int offset = 0, size, count = 0, i;
    unsigned int f, bytecount, segs;
    i = tx_ring->next_to_use;
    while (len) {
        buffer_info = &tx_ring->buffer_info[i];
        size = min(len, max_per_txd);
        /* Workaround for Controller erratum --
         * descriptor for non-tso packet in a linear SKB that follows a
         * tso gets written back prematurely before the data is fully
         * DMA'd to the controller */
        if (!skb->data_len && tx_ring->last_tx_tso &&
         !skb_is_gso(skb)) {
            tx_ring->last_tx_tso = false;
            size -= 4;
        }
        /* Workaround for premature desc write-backs
         * in TSO mode. Append 4-byte sentinel desc */
        if (unlikely(mss && !nr_frags && size == len && size > 8))
            size -= 4;
        /* work-around for errata 10 and it applies
         * to all controllers in PCI-X mode
         * The fix is to make sure that the first descriptor of a
         * packet is smaller than 2048 - 16 - 16 (or 2016) bytes
         */
        if (unlikely((hw->bus_type == e1000_bus_type_pcix) &&
         (size > 2015) && count == 0))
         size = 2015;
        /* Workaround for potential 82544 hang in PCI-X. Avoid
         * terminating buffers within evenly-aligned dwords. */
        if (unlikely(adapter->pcix_82544 &&
         !((unsigned long)(skb->data + offset + size - 1) & 4) &&
         size > 4))
            size -= 4;
        buffer_info->length = size;
        /* set time_stamp *before* dma to help avoid a possible race */
        buffer_info->time_stamp = jiffies;
        buffer_info->mapped_as_page = false;
        buffer_info->dma = dma_map_single(&pdev->dev,
                         skb->data + offset,
                         size,    DMA_TO_DEVICE);
        if (dma_mapping_error(&pdev->dev, buffer_info->dma))
            goto dma_error;
        buffer_info->next_to_watch = i;
        len -= size;
        offset += size;
        count++;
        if (len) {
            i++;
            if (unlikely(i == tx_ring->count))
                i = 0;
        }
    }
    for (f = 0; f < nr_frags; f++) {
        const struct skb_frag_struct *frag;
        frag = &skb_shinfo(skb)->frags[f];
        len = skb_frag_size(frag);
        offset = 0;
        while (len) {
            unsigned long bufend;
            i++;
            if (unlikely(i == tx_ring->count))
                i = 0;
            buffer_info = &tx_ring->buffer_info[i];
            size = min(len, max_per_txd);
            /* Workaround for premature desc write-backs
             * in TSO mode. Append 4-byte sentinel desc */
            if (unlikely(mss && f == (nr_frags-1) && size == len && size > 8))
                size -= 4;
            /* Workaround for potential 82544 hang in PCI-X.
             * Avoid terminating buffers within evenly-aligned
             * dwords. */
            bufend = (unsigned long)
                page_to_phys(skb_frag_page(frag));
            bufend += offset + size - 1;
            if (unlikely(adapter->pcix_82544 &&
                 !(bufend & 4) &&
                 size > 4))
                size -= 4;
            buffer_info->length = size;
            buffer_info->time_stamp = jiffies;
            buffer_info->mapped_as_page = true;
            buffer_info->dma = skb_frag_dma_map(&pdev->dev, frag,
                        offset, size, DMA_TO_DEVICE);
            if (dma_mapping_error(&pdev->dev, buffer_info->dma))
                goto dma_error;
            buffer_info->next_to_watch = i;
            len -= size;
            offset += size;
            count++;
        }
    }
    segs = skb_shinfo(skb)->gso_segs ?: 1;
    /* multiply data chunks by size of headers */
    bytecount = ((segs - 1) * skb_headlen(skb)) + skb->len;
    tx_ring->buffer_info[i].skb = skb;
    tx_ring->buffer_info[i].segs = segs;
    tx_ring->buffer_info[i].bytecount = bytecount;
    tx_ring->buffer_info[first].next_to_watch = i;
    return count;
dma_error:
    dev_err(&pdev->dev, 'TX DMA map failed\n');
    buffer_info->dma = 0;
    if (count)
        count--;
    while (count--) {
        if (i==0)
            i += tx_ring->count;
        i--;
        buffer_info = &tx_ring->buffer_info[i];
        e1000_unmap_and_free_tx_resource(adapter, buffer_info);
    }
    return 0;
}

默认数据报文没有分片或者碎片什么的。
那么进入第一个while(len)

获取buffer_info = &tx_ring->buffer_info[i];
然后:调用dma_map_single进行流式映射. 即把skb->data(虚拟地址) 和buffer_info->dma(物理地址)对应起来.操作两个地址等于操作同一片区域。

1
2
3
4
5
6
7
buffer_info->length = size;
        /* set time_stamp *before* dma to help avoid a possible race */
        buffer_info->time_stamp = jiffies;
        buffer_info->mapped_as_page = false;
        buffer_info->dma = dma_map_single(&pdev->dev,
                         skb->data + offset,
                         size,    DMA_TO_DEVICE);

回到主发送函数:

1
2
3
4
5
6
7
8
9
if (count) {
        netdev_sent_queue(netdev, skb->len);
        skb_tx_timestamp(skb);
        e1000_tx_queue(adapter, tx_ring, tx_flags, count);
        /* Make sure there is space in the ring for the next send. */
        e1000_maybe_stop_tx(netdev, tx_ring, MAX_SKB_FRAGS + 2);
    }

调用e1000_tx_queue把数据发送出去:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
static void e1000_tx_queue(struct e1000_adapter *adapter,
             struct e1000_tx_ring *tx_ring, int tx_flags,
             int count)
{
    struct e1000_hw *hw = &adapter->hw;
    struct e1000_tx_desc *tx_desc = NULL;
    struct e1000_buffer *buffer_info;
    u32 txd_upper = 0, txd_lower = E1000_TXD_CMD_IFCS;
    unsigned int i;
    ...
    i = tx_ring->next_to_use;
    while (count--) {
        buffer_info = &tx_ring->buffer_info[i];
        tx_desc = E1000_TX_DESC(*tx_ring, i);
        tx_desc->buffer_addr = cpu_to_le64(buffer_info->dma);
        tx_desc->lower.data =
            cpu_to_le32(txd_lower | buffer_info->length);
        tx_desc->upper.data = cpu_to_le32(txd_upper);
        if (unlikely(++i == tx_ring->count)) i = 0;
    }
    tx_desc->lower.data |= cpu_to_le32(adapter->txd_cmd);
    /* txd_cmd re-enables FCS, so we'll re-disable it here as desired. */
    if (unlikely(tx_flags & E1000_TX_FLAGS_NO_FCS))
        tx_desc->lower.data &= ~(cpu_to_le32(E1000_TXD_CMD_IFCS));
    /* Force memory writes to complete before letting h/w
     * know there are new descriptors to fetch. (Only
     * applicable for weak-ordered memory model archs,
     * such as IA-64). */
    wmb();
    tx_ring->next_to_use = i;
    writel(i, hw->hw_addr + tx_ring->tdt);
    /* we need this if more than one processor can write to our tail
     * at a time, it syncronizes IO on IA64/Altix systems */
    mmiowb();
}

我们看到它把刚才dma_map_singe里的映射赋值了:
tx_desc->buffer_addr = cpu_to_le64(buffer_info->dma);
说明发送的时候是根据发送描述符来发送的。

然后操作寄存器:
writel(i, hw->hw_addr + tx_ring->tdt);
那么网卡就会自动读取tx desc 然后把数据发送出去。

总结下流程:
1. linux os会调用网卡的start_xmit()函数。在e1000里,对应的函数是 e1000_xmit_frame,
2. e1000_xmit_frame又会调用e1000_tx_queue(adapter, tx_ring, tx_flags, count)。
这里的tx_queue指的是发送Descriptor的queue。
3. e1000_tx_queue 在检查了一些参数后,最终调用 writel(i, hw->hw_addr + tx_ring->tdt)。
这里的tx_ring->tdt中的tdt全写为 tx_descriptor_tail。从网卡的开发手册中可以查到,如果写了descriptor tail,那么网卡就会自动读取 descriptor,然后把包发送出去。

descroptor的主要内容是addr pointer和length。前者是要发送的包的起始物理地址。后者是包的长度。有了这些,硬件就可以通过dma来读取包并发出去了。其他网卡也基本会用descriptor的结构。

虽然流程明白了,但是还有几个点,
1. tx_ring在哪初始化?
2. 网卡到底是如何操作映射的dma地址的,把数据发送出去的?

tx ring 在e1000_open 的时候:
调用:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
/**
 * e1000_setup_all_tx_resources - wrapper to allocate Tx resources
 *                  (Descriptors) for all queues
 * @adapter: board private structure
 *
 * Return 0 on success, negative on failure
 **/
int e1000_setup_all_tx_resources(struct e1000_adapter *adapter)
{
    int i, err = 0;
    for (i = 0; i < adapter->num_tx_queues; i++) {
        err = e1000_setup_tx_resources(adapter, &adapter->tx_ring[i]);
        if (err) {
            e_err(probe, 'Allocation for Tx Queue %u failed\n', i);
            for (i-- ; i >= 0; i--)
                e1000_free_tx_resources(adapter,
                            &adapter->tx_ring[i]);
            break;
        }
    }
    return err;
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
/**
 * e1000_setup_tx_resources - allocate Tx resources (Descriptors)
 * @adapter: board private structure
 * @txdr: tx descriptor ring (for a specific queue) to setup
 *
 * Return 0 on success, negative on failure
 **/
static int e1000_setup_tx_resources(struct e1000_adapter *adapter,
                 struct e1000_tx_ring *txdr)
{
    struct pci_dev *pdev = adapter->pdev;
    int size;
    size = sizeof(struct e1000_buffer) * txdr->count;
    txdr->buffer_info = vzalloc(size);
    if (!txdr->buffer_info) {
        e_err(probe, 'Unable to allocate memory for the Tx descriptor '
         'ring\n');
        return -ENOMEM;
    }
    /* round up to nearest 4K */
    txdr->size = txdr->count * sizeof(struct e1000_tx_desc);
    txdr->size = ALIGN(txdr->size, 4096);
    txdr->desc = dma_alloc_coherent(&pdev->dev, txdr->size, &txdr->dma,
                    GFP_KERNEL);
    if (!txdr->desc) {
setup_tx_desc_die:
        vfree(txdr->buffer_info);
        e_err(probe, 'Unable to allocate memory for the Tx descriptor '
         'ring\n');
        return -ENOMEM;
    }
    /* Fix for errata 23, can't cross 64kB boundary */
    if (!e1000_check_64k_bound(adapter, txdr->desc, txdr->size)) {
        void *olddesc = txdr->desc;
        dma_addr_t olddma = txdr->dma;
        e_err(tx_err, 'txdr align check failed: %u bytes at %p\n',
         txdr->size, txdr->desc);
        /* Try again, without freeing the previous */
        txdr->desc = dma_alloc_coherent(&pdev->dev, txdr->size,
                        &txdr->dma, GFP_KERNEL);
        /* Failed allocation, critical failure */
        if (!txdr->desc) {
            dma_free_coherent(&pdev->dev, txdr->size, olddesc,
                     olddma);
            goto setup_tx_desc_die;
        }
        if (!e1000_check_64k_bound(adapter, txdr->desc, txdr->size)) {
            /* give up */
            dma_free_coherent(&pdev->dev, txdr->size, txdr->desc,
                     txdr->dma);
            dma_free_coherent(&pdev->dev, txdr->size, olddesc,
                     olddma);
            e_err(probe, 'Unable to allocate aligned memory '
             'for the transmit descriptor ring\n');
            vfree(txdr->buffer_info);
            return -ENOMEM;
        } else {
            /* Free old allocation, new allocation was successful */
            dma_free_coherent(&pdev->dev, txdr->size, olddesc,
                     olddma);
        }
    }
    memset(txdr->desc, 0, txdr->size);
    txdr->next_to_use = 0;
    txdr->next_to_clean = 0;
    return 0;
}

我们看:它建立了一致性dma映射.

1
2
txdr->desc = dma_alloc_coherent(&pdev->dev, txdr->size,
                &txdr->dma, GFP_KERNEL);

desc是结构指针:它的结构跟网卡寄存器结构有关,e1000_hw.h

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
/* Transmit Descriptor */
struct e1000_tx_desc {
    __le64 buffer_addr;    /* Address of the descriptor's data buffer */
    union {
        __le32 data;
        struct {
            __le16 length;    /* Data buffer length */
            u8 cso;    /* Checksum offset */
            u8 cmd;    /* Descriptor control */
        } flags;
    } lower;
    union {
        __le32 data;
        struct {
            u8 status;    /* Descriptor status */
            u8 css;    /* Checksum start */
            __le16 special;
        } fields;
    } upper;
}

我们稍微屡一下,

1
2
3
skb->data  --- ring->buffer_info->dma
ring->dma  ---  ring->desc
 ring->desc->buffer_addr ---ring->buffer_info->dma

那么网卡又是如何和dma地址关联的呢?

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
/**
 * e1000_configure_tx - Configure 8254x Transmit Unit after Reset
 * @adapter: board private structure
 *
 * Configure the Tx unit of the MAC after a reset.
 **/
static void e1000_configure_tx(struct e1000_adapter *adapter)
{
    u64 tdba;
    struct e1000_hw *hw = &adapter->hw;
    u32 tdlen, tctl, tipg;
    u32 ipgr1, ipgr2;
    /* Setup the HW Tx Head and Tail descriptor pointers */
    switch (adapter->num_tx_queues) {
    case 1:
    default:
        tdba = adapter->tx_ring[0].dma;
        tdlen = adapter->tx_ring[0].count *
            sizeof(struct e1000_tx_desc);
        ew32(TDLEN, tdlen);
        ew32(TDBAH, (tdba >> 32));
        ew32(TDBAL, (tdba & 0x00000000ffffffffULL));
        ew32(TDT, 0);
        ew32(TDH, 0);
        adapter->tx_ring[0].tdh = ((hw->mac_type >= e1000_82543) ? E1000_TDH : E1000_82542_TDH);
        adapter->tx_ring[0].tdt = ((hw->mac_type >= e1000_82543) ? E1000_TDT : E1000_82542_TDT);
        break;
    }

很明显它把dma地址写入了网卡dma寄存器。所以dma还需要网卡硬件的支持才行.

当然e1000这个网卡驱动还是相当的复杂,不过它把一致性映射和流式映射都用上了。

1 收藏 评论
本站仅提供存储服务,所有内容均由用户发布,如发现有害或侵权内容,请点击举报
打开APP,阅读全文并永久保存 查看更多类似文章
猜你喜欢
类似文章
【热】打开小程序,算一算2024你的财运
NAPI详细讲解(2)
Linux内核分析 - 网络[二]:网卡驱动接收报文
十年码农内功:收包(二)
linux网络设备应用与驱动编程学习4——模板与实例(A)
STM32 串口3种工作模式比较
Linux网卡数据包的接收
更多类似文章 >>
生活服务
热点新闻
分享 收藏 导长图 关注 下载文章
绑定账号成功
后续可登录账号畅享VIP特权!
如果VIP功能使用有故障,
可点击这里联系客服!

联系客服