Hi,

I'm trying to convert a simple C program to assembly using NASM and NASMX. See the code below. The assembly version does not really work. The C version results in a fullscreen window showing a white pixel at point 100, 100. The assembly version creates a fullscreen window but there is no pixel visible. Also it seems that ChangeDisplaySettings does not change the display settings in the assembly version. My monitor shows a little symbol when the screen resolution changes. With the C version this symbol is shown. Not so with the assembly version.

I'm new to assembly programming so I guess there is something wrong with how I handle the parameters. I can't find documentation on NASMX so I'm not sure whether I initialize the structs correctly. What's wrong with the assembly program?

Would be great to get some tipps, pointers and links to resource I can use to fix the assembly program.

NASMX is great but I would rather not use it. My aim is to great the smallest possible windows executable creating a fullscreen window with an OpenGL context. I think with full control there are more switches I can use to shrink the code. How to setup these big structs for ChangeDisplaySettings and ChoosePixelFormat without NASMX?

Peace
Jan

The code of the C version:

#include <windows.h>
#include <GL/gl.h>

#ifdef __cplusplus
extern "C"
{
#endif
int  _fltused = 0;
#ifdef __cplusplus
}
#endif

#define XRES 1024
#define YRES 768

static PIXELFORMATDESCRIPTOR pfd = {
    0, // Size Of This Pixel Format Descriptor... BAD coding, nothing new, saves 6 bytes
    1, PFD_SUPPORT_OPENGL | PFD_DOUBLEBUFFER, 32, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 32, 0, 0, 0, 0, 0, 0, 0
};

static DEVMODE dmScreenSettings = {
    "", 0, 0, sizeof(dmScreenSettings), 0, DM_PELSWIDTH|DM_PELSHEIGHT, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, "", 0, 0, XRES, YRES, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};

void WINAPI WinMainCRTStartup() {

    ChangeDisplaySettings(&dmScreenSettings, CDS_FULLSCREEN);
    HDC hDC = GetDC(CreateWindow("edit", 0, WS_POPUP|WS_VISIBLE|WS_MAXIMIZE, 0, 0, 0, 0, 0, 0, 0, 0));
    SetPixelFormat(hDC, ChoosePixelFormat(hDC, &pfd), &pfd);
    wglMakeCurrent(hDC, wglCreateContext(hDC));
    ShowCursor(FALSE);
    glViewport(0, 0, XRES, YRES);
    glMatrixMode(0x1701);
    glLoadIdentity();
    glOrtho(0, XRES, 0, YRES, -1, 1);
    glMatrixMode(0x1700);
    glLoadIdentity();
    do {
        glColor3ub(255, 255, 255);
        glBegin(0x0000);
        glVertex2i(100, 100);
        glEnd();
        SwapBuffers(hDC);
        PeekMessageA(0, 0, 0, 0, PM_REMOVE);
    } while (!GetAsyncKeyState(VK_ESCAPE));
    ExitProcess(0);
}


And the assembly code:

%include 'nasmx-1.0\inc\nasmx.inc'
%include 'nasmx-1.0\inc\win32\windows.inc'
%include 'nasmx-1.0\inc\win32\kernel32.inc'
%include 'nasmx-1.0\inc\win32\user32.inc'
%include 'nasmx-1.0\inc\win32\gdi32.inc'
%include 'nasmx-1.0\inc\win32\opengl32.inc'

entry entrypoint

XRES    equ    1024
YRES    equ    786


proc    entrypoint, ptrdiff_t argcount, ptrdiff_t cmdline
locals none
    invoke  ChangeDisplaySettingsA, ptrdiff_t , 0x00000004
    invoke  CreateWindowExA, 0, szEdit, 0, WS_POPUP + WS_VISIBLE, 0, 0, XRES, YRES, 0, 0, 0, 0
    invoke  GetDC, eax
    mov    ptrdiff_t , eax
    invoke  ChoosePixelFormat, ptrdiff_t , pfd
    invoke  SetPixelFormat, ptrdiff_t , eax, pfd
    invoke  wglCreateContext, ptrdiff_t
    invoke  wglMakeCurrent, ptrdiff_t , eax
    invoke  ShowCursor, FALSE
    invoke  glViewport, 0, 0, XRES, YRES
    invoke  glMatrixMode, 0x1701
    invoke  glLoadIdentity
    invoke  glOrtho, 0, XRES, 0, YRES, -1, 1
    invoke  glMatrixMode, 0x1700
    invoke  glLoadIdentity
.intro_loop:
    invoke  glColor3ub, 255, 255, 255
    invoke  glBegin, 0x0000
    invoke  glVertex2i, 100, 100
    invoke  glEnd
    invoke  SwapBuffers, ptrdiff_t
    invoke  PeekMessageA, 0, 0, 0, 0, PM_REMOVE
    invoke  GetAsyncKeyState, VK_ESCAPE
    cmp    eax, dword 0
    je      .intro_loop
    invoke  ExitProcess, 0
endproc


    hDc:            reserve(ptrdiff_t) 1


    szEdit:        declare(NASMX_TCHAR) NASMX_TEXT("edit"), 0x0
    NASMX_ISTRUC pfd, PIXELFORMATDESCRIPTOR
        NASMX_AT nSize, 0
        NASMX_AT nVersion, 1
        NASMX_AT dwFlags, 33
        NASMX_AT iPixelType, 32
        NASMX_AT cColorBits, 0
        NASMX_AT cRedBits, 0
        NASMX_AT cRedShift, 0
        NASMX_AT cGreenBits, 0
        NASMX_AT cGreenShift, 0
        NASMX_AT cBlueBits, 0
        NASMX_AT cBlueShift, 0
        NASMX_AT cAlphaBits, 0
        NASMX_AT cAlphaShift, 0
        NASMX_AT cAccumBits, 0
        NASMX_AT cAccumRedBits, 0
        NASMX_AT cAccumGreenBits, 0
        NASMX_AT cAccumBlueBits, 0
        NASMX_AT cAccumAlphaBits, 32
        NASMX_AT cDepthBits, 0
        NASMX_AT cStencilBits, 0
        NASMX_AT cAuxBuffers, 0
        NASMX_AT iLayerType, 0
        NASMX_AT bReserved, 0
        NASMX_AT dwLayerMask, 0
        NASMX_AT dwVisibleMask, 0
        NASMX_AT dwDamageMask, 0
    NASMX_IENDSTRUC
    NASMX_ISTRUC dmScreenSettings, DEVMODE
        NASMX_AT dmDeviceName, ""
        NASMX_AT dmSpecVersion, 0
        NASMX_AT dmDriverVersion, 0
        NASMX_AT dmSize, 156
        NASMX_AT dmDriverExtra, 0
        NASMX_AT dmFields, 1572864
        NASMX_AT dmOrientation, 0
        NASMX_AT dmPaperSize, 0
        NASMX_AT dmPaperiLength, 0
        NASMX_AT dmPaperWidth, 0
        NASMX_AT dmScale, 0
        NASMX_AT dmCopies, 0
        NASMX_AT dmDefaultSource, 0
        NASMX_AT dmPrintQuality, 0
        NASMX_AT dmColor, 0
        NASMX_AT dmDuplex, 0
        NASMX_AT dmYResolution, 0
        NASMX_AT dmTTOption, 0
        NASMX_AT dmCollate, 0
        NASMX_AT dmFormName, ""
        NASMX_AT dmUnusedPadding, 0
        NASMX_AT dmBitsPerPel, 0
        NASMX_AT dmPelsWidth, XRES
        NASMX_AT dmPelsHeight, YRES
        NASMX_AT dmDisplayFlags, 0
        NASMX_AT dmDisplayFrequency, 0
        NASMX_AT dmICMMethod, 0
        NASMX_AT dmICMIntent, 0
        NASMX_AT dmMediaType, 0
        NASMX_AT dmDitherType, 0
        NASMX_AT dmReserved1, 0
        NASMX_AT dmReserved2, 0
        NASMX_AT dmPanningWidth, 0
        NASMX_AT dmPanningHeight, 0
    NASMX_IENDSTRUC
Posted on 2012-09-03 13:25:00 by parasight
I can't test the code at the moment, however the ampersand in the call to the ChangeDisplaySettings function in the C example means that you want the address of the structure. This means the assembly code should look something like this instead:

invoke  ChangeDisplaySettingsA, ptrdiff_t dmScreenSettings, 0x00000004


NASMX is great but I would rather not use it. My aim is to great the smallest possible windows executable creating a fullscreen window with an OpenGL context. I think with full control there are more switches I can use to shrink the code.


NASMX doesn't really add much overhead (if any) to your code. Most of the stuff that NASMX does is to provide preprocessor features that the assembler itself doesn't support (like nested structures and single-line invocations) out of box.

One important note, NASMX was never designed to be a learning tool or something for novice programmers. It was a method to provide rapid application development facilities with the Netwide Assembler. So if you are just learning, then it's not a bad idea to wade through all that calling convention stuff and get used to the native NASM syntax. Then when you become competent, use NASMX to allow you to develop ideas in less time.

Regards,
Bryant Keller
Posted on 2012-09-03 14:15:10 by Synfire
Bryant,

thanks much for your input!


I can't test the code at the moment, however the ampersand in the call to the ChangeDisplaySettings function in the C example means that you want the address of the structure. This means the assembly code should look something like this instead:

invoke  ChangeDisplaySettingsA, ptrdiff_t dmScreenSettings, 0x00000004


I've tried that variant as well. It does not work either. I guess there is something wrong with how I fill the screen settings struct. Later I'll disassemble the binary of my C version to see how the compiler does it.

NASMX doesn't really add much overhead (if any) to your code. Most of the stuff that NASMX does is to provide preprocessor features that the assembler itself doesn't support (like nested structures and single-line invocations) out of box.

Ok, that's good to know. What I want is no overhead at all. I don't want to 'waste' a single byte :) With the help of crinkler and certain compiler options the binary of the C version is 609 bytes. I think there could be room to save more bytes by doing it all 'by hand'.

One important note, NASMX was never designed to be a learning tool or something for novice programmers. It was a method to provide rapid application development facilities with the Netwide Assembler. So if you are just learning, then it's not a bad idea to wade through all that calling convention stuff and get used to the native NASM syntax. Then when you become competent, use NASMX to allow you to develop ideas in less time.

I never thought it was designed for novice programmers. That's ok for me and that's also why I want to avoid using it. I want to know how it works before I use tools or libraries to do the work for me. I found a basic program in NASM syntax using the Win32 API without NASMX. I'll try using it as a basis for my program.
Posted on 2012-09-04 01:43:25 by parasight
NASMX can and is used by both beginner and advanced users alike.  Beginning programmers are shielded from calling convention complexities and can thus concentrate more on the original problem they are trying to solve ( or learn from! ).  Advanced users use NASMX to automate repetitive coding tasks and not worry about introducing subtle bugs when doing some quick coding.  It is, after all, the Netwide Assembler Set of Macros eXtended. ;)

The beautiful thing about NASMX is that if you structure your code properly you can easily port between Windows and Linux by maintaining a small set of core files that handle OS specific functionality and make use of those routines from your main app.  It's probably the closest thing an assembly programmer can get to write-once run everywhere ( as long as everywhere means 32/64-bit AMD/Intel CPU chips, that is ;-).
Posted on 2012-09-04 11:15:33 by p1ranha
I got it working without NASMX and saved a few bytes. It's still bigger than the C version (compiled with the Microsoft compiler) though. Does anyone see room to save bytes without changing the functionality? Should I try MASM to see if it creates a smaller binary?

This is the code:

global  main

extern _glOrtho@48
extern _ChangeDisplaySettingsA@8
extern _CreateWindowExA@48
extern _GetDC@4
extern _ChoosePixelFormat@8
extern _SetPixelFormat@12
extern _wglCreateContext@4
extern _wglMakeCurrent@8
extern _ShowCursor@4
extern _glViewport@16
extern _glMatrixMode@4
extern _glLoadIdentity@0
extern _glColor3ub@12
extern _glBegin@4
extern _glVertex2i@8
extern _glEnd@0
extern _SwapBuffers@4
extern _PeekMessageA@20
extern _GetAsyncKeyState@4
extern _ExitProcess@4

XRES    equ    800
YRES    equ    600


main:
    push    4                                  ; CDS_FULLSCREEN
    push    dword dms
    call    _ChangeDisplaySettingsA@8
   
    push    0
    push    0
    push    0
    push    0
    push    0
    push    0
    push    0
    push    0
    push    91000000h                          ; WS_POPUP + WS_VISIBLE + WS_MAXIMIZE
    push    0
    push    szEdit
    push    0
    call    _CreateWindowExA@48
   
    push    eax
    call    _GetDC@4
    mov    dword , eax

    push    dword pfd
    push    eax
    call    _ChoosePixelFormat@8
   
    push    dword pfd
    push    eax
    push    dword
    call    _SetPixelFormat@12
   
    push    dword
    call    _wglCreateContext@4

    push    eax
    push    dword
    call    _wglMakeCurrent@8

    push    0
    call    _ShowCursor@4

    push    YRES
    push    XRES
    push    0
    push    0
    call    _glViewport@16
   
    push    0x1701
    call    _glMatrixMode@4

    call    _glLoadIdentity@0

    sub    esp, 8
    fld1
    fstp    qword
    sub    esp, 8
    fldz
    fstp    qword
    sub    esp, 8
    fld    qword
    fstp    qword
    sub    esp, 8
    fldz
    fstp    qword
    sub    esp, 8
    fld    qword
    fstp    qword
    sub    esp, 8
    fldz
    fstp    qword
    call    _glOrtho@48
   
    push    0x1700
    call    _glMatrixMode@4

    call    _glLoadIdentity@0

.intro_loop:
   
    push    0
    push    0
    push    255
    call    _glColor3ub@12
   
    push    0                                  ; GL_POINTS
    call    _glBegin@4
   
    push    300
    push    400
    call    _glVertex2i@8
   
    call    _glEnd@0
   
    push    dword
    call    _SwapBuffers@4
   
    push    1                                  ; PM_REMOVE
    push    0
    push    0
    push    0
    push    0
    call    _PeekMessageA@20
   
    push    1Bh                                ; VK_ESCAPE
    call    _GetAsyncKeyState@4
    cmp    eax, dword 0
    je      .intro_loop
   
    push    0
    call    _ExitProcess@4
    ret
   

    hDc:    resd 1


    dms:    dd 0, 0, 0, 0, 0, 0, 0, 0, 0, 156, 1572864, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 800, 600, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
    pfd:    dd 65536, 33, 32, 0, 0, 2097152, 0, 0, 0
    r:      dq 800.0
    t:      dq 600.0
    szEdit: db 'e', 'd', 'i', 't', 0
Posted on 2012-09-04 16:38:36 by parasight

It's still bigger than the C version (compiled with the Microsoft compiler) though.


This right here tells me it's probably your code or linker options and not the assembler output.  Why don't you compare the disassembly of the compiler output to your source to find where you can make improvements?  Also, you may want to research Portable Executable ( PE ) header reduction if you really want to shrink your program by even more bytes.
Posted on 2012-09-04 18:08:46 by p1ranha
You push all those zeros.  You can get a smaller push encoding by zeroing out a register and pushing the "zeroed" register instead of a 0

cmp     eax, dword 0

can be changed to
test eax, eax
jz      .intro_loop


Your using hDc a few times, use a register instead.
Posted on 2012-09-04 19:20:23 by Gunner