现代X86汇编-C和ASM混合编程举例

发布于:2024-06-14 ⋅ 阅读:(113) ⋅ 点赞:(0)

端午假期安装好了vs c++2022,并写了个简单的汇编代码,证明MASM真的可以运行。今天需要搞一个实实在在的C++和ASM混合编程的例子,因为用纯汇编的求伯君写WPS的时代一去不复返了。个别关键函数用汇编,充分发挥CPU的特色功能,偶尔还是需要的。

昨天找的随书代码的位置在github上:GitHub - Apress/modern-x86-assembly-language-programming-3e: Source Code for 'Modern X86 Assembly Language Programming' by Daniel Kusswurm

这是第三版,最新的书。又从z-liabrary上下载了这本英文书,导入微信读书,自动翻译为中文,z-libary加微信读书,使我实现了读书ziyou(啥时候财务ziyou,还远)。

这本书的附录A就举了怎样在vs2022环境建立一个C++加ASM的例子,今天咱们就逐步跟着书上学这个例子。

首先创建project

• Create a C++ project• Enable MASM support• Add an assembly language file• Set project properties•Edit the source code• Build and run the project

  1. 启动VS
  2. New Project
  3. Select Console App
  4. Project name:Example1
  5. Solution name:TestSolution
  6. Create
  7. Build>Configuration Manager,choose <Edit...>
  8. select X86, Remove--我的环境是Win32

其次,配置ASM环境的步骤

  1. View>Solution Explorer
  2. rigtht-click Example1 and select Build Denpendencies>Build Customizations
  3. check masm
  4. Add New Item
  5. select .cpp for the file style
  6. Example1_fasm.asm Add

第三步是设置project属性

  1. Example1  and select Properties
  2. All Configurations   All Platforms
  3. C/C++>Code Generation Set to Advanced Vector Extentions(/arch:AVX) or AVX2 or AVX512
  4. C/C++>Output change to Files Assembly Machine and Source Code(/FAcs)
  5. Microsoft Macro Assembler>Listing File  Enable Assembly Generated Code Listing to Yes(/Sg)
  6. Change the Assembled Code Listing File text filed to $(IntDir)\%(filename).lst
  7. Click OK

$(IntDir)\%(filename).lst  --这是1还是L?

最后一步就是写源码了

  1. AppendixA\TestSolution\Example1\Example1.cpp
  2. AppendixA\TestSolution\Example1\Example1_fasm.asm

Example1.cpp

#include <iostream>
#include <iomanip>
#include <string>
#include <cmath>

extern "C" void CalcZ_avx(float* z, const float* x, const float* y, size_t n);

static void CalcZ_cpp(float* z, const float* x, const float* y, size_t n)
{
    for (size_t i = 0; i < n; i++)
        z[i] = x[i] + y[i];
}

int main(void)
{
    constexpr size_t n = 20;
    float x[n], y[n], z1[n], z2[n];

    // Initialize the data arrays
    for (size_t i = 0; i < n; i++)
    {
        x[i] = i * 10.0f + 10.0f;
        y[i] = i * 1000.0f + 1000.0f;
        z1[i] = z2[i] = 0.0f;
    }

    // Exercise the calculating functions
    CalcZ_cpp(z1, x, y, n);
    CalcZ_avx(z2, x, y, n);

    // Display the results
    constexpr char nl = '\n';
    constexpr size_t w = 10;
    constexpr float eps = 1.0e-6f;

    std::cout << std::fixed << std::setprecision(1);

    std::cout << std::setw(w) << "i";
    std::cout << std::setw(w) << "x";
    std::cout << std::setw(w) << "y";
    std::cout << std::setw(w) << "z1";
    std::cout << std::setw(w) << "z2" << nl;
    std::cout << std::string(50, '-') << nl;

    for (size_t i = 0; i < n; i++)
    {
        std::cout << std::setw(w) << i;
        std::cout << std::setw(w) << x[i];
        std::cout << std::setw(w) << y[i];
        std::cout << std::setw(w) << z1[i];
        std::cout << std::setw(w) << z2[i] << nl;

        if (fabs(z1[i] - z2[i]) > eps)
        {
            std::cout << "Compare error!\n";
            break;
        }
    }

}

Example1_fasm.asm

;------------------------------------------------------------------------------
; Example1_fasm.asm
;------------------------------------------------------------------------------

;------------------------------------------------------------------------------
; void CalcZ_avx(float* z, const float* x, const float* x, size_t n);
;------------------------------------------------------------------------------

NSE     equ 8                                   ;num_simd_elements
SF      equ 4                                   ;scale factor for F32

        .code
CalcZ_avx proc

; Validate arguments
        test r9,r9                              ;n == 0?
        jz Done                                 ;jump if yes

; Initialize
        mov rax,-SF                             ;rax = array offset (Loop2)
        cmp r9,NSE                              ;n < NSE?
        jb Loop2                                ;jump if yes
        mov rax,-NSE*SF                         ;rax = array offset (Loop1)

; Calculate z[i:i+7] = x[i:i+7] + y[i:i+7]
Loop1:  add rax,NSE*SF                          ;update array offset
        vmovups ymm0,ymmword ptr [rdx+rax]      ;ymm0 = x[i:i+7]
        vmovups ymm1,ymmword ptr [r8+rax]       ;ymm1 = y[i:i+7]
        vaddps ymm2,ymm0,ymm1                   ;z[i:i+7] = x[i:i+7] + y[i:i+7]
        vmovups ymmword ptr [rcx+rax],ymm2      ;save z[i:i+7]

        sub r9,NSE                              ;n -= NSE
        cmp r9,NSE                              ;n >= NSE?
        jae Loop1                               ;jump if yes

        test r9,r9                              ;n == 0?
        jz Done                                 ;jump if yes
        add rax,NSE*SF-SF                       ;adjust array offset for Loop2

; Calculate z[i] = x[i] + y[i] for remaining elements
Loop2:  add rax,SF                              ;update array offset
        vmovss xmm0,real4 ptr [rdx+rax]         ;xmm0 = x[i]
        vmovss xmm1,real4 ptr [r8+rax]          ;xmm1 = y[i]
        vaddss xmm2,xmm0,xmm1                   ;z[i] = x[i] + y[i]
        vmovss real4 ptr [rcx+rax],xmm2         ;save z[i]

        sub r9,1                                ;n -= 1
        jnz Loop2                               ;repeat until done

Done:   vzeroupper
        ret                                     ;return to caller
CalcZ_avx endp
        end

最终构建运行即可

代码有点高大上,估计是用了AVX,两个loop同时运行。慢慢看书了解含义吧,还挺复杂的。

这个例子太高深了,再举个简单的例子,把数组倒序输出。


网站公告

今日签到

点亮在社区的每一天
去签到