Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | Download | RSS feed

  1. ; Splits command line to argv array.
  2. ; Uses standard Windows rules:
  3. ; * in normal mode, arguments are separated with spaces and tabs,
  4. ;   duplicate spaces and tabs are ignored
  5. ;   (two sequential spaces are the same as one);
  6. ; * unescaped quote " in normal mode starts quoted mode,
  7. ;   it does not end the current argument, it is not included in the argument;
  8. ; * spaces and tabs in quoted mode are included in the argument as is;
  9. ; * unescaped quote " in quoted mode returns to normal mode,
  10. ;   it does not end the current argument, it is not included in the argument;
  11. ; * quotes can be escaped with backslashes \ in both modes
  12. ;   (the recommended way), \" means copying " to the argument
  13. ;   without switching modes;
  14. ; * backslashes not before a quote are just regular characters,
  15. ;   backslashes before a quote should be escaped by another backslash:
  16. ;   " means unescaped quote
  17. ;   \" means character "
  18. ;   \\" means character \ plus unescaped quote
  19. ;   \\\" means characters \"
  20. ;   and so on;
  21. ; * quotes in quoted mode can also be escaped by doubling them, ""
  22. ;   (the confusing way); note that in normal mode "" means empty argument.
  23. ; For example, the command line
  24. ; begin"quoted mode"end\ \"escaped" "quotes" "1\" "" """escaped quotes 2"""
  25. ; has 4 arguments:
  26. ; 1) beginquoted modeend\
  27. ; 2) "escaped quotes 1"
  28. ; 3)
  29. ; 4) "escaped quotes 2"
  30. ; The recommended way to create a command line with the same arguments:
  31. ; "beginquoted modeend"\ "\"escaped quotes 1\"" "\"escaped quotes 2\"".
  32. ;
  33. ; in: esi -> command line
  34. ; in: edi -> data for arguments, maybe null
  35. ; in: edx -> pointers to arguments, maybe null
  36. ; out: ebx = argument count
  37. ;
  38. ; There are too many branches and labels here,
  39. ; isolate some of them into macro.
  40. macro start_arg
  41. ; Increments argument count;
  42. ; if arguments are tracked, stores the current address.
  43. {
  44. local .label
  45.         test    edx, edx
  46.         jz      .label
  47.         mov     [edx], edi
  48.         add     edx, 4
  49. .label:
  50.         inc     ebx
  51. }
  52. ; In typical cases decoded arguments and input line have large chunks in common.
  53. ; When going through the input string, we do not copy arguments immediately,
  54. ; but track size of last chunk that should be copied instead.
  55. ; This macros copies the last chunk of data if arguments are tracked.
  56. ; If arguments are tracked, ecx is reset to zero;
  57. ; otherwise, we do not care about ecx.
  58. macro copy_arg_data
  59. {
  60. local .label
  61.         test    edi, edi
  62.         jz      .label
  63.         dec     esi
  64.         sub     esi, ecx
  65.         rep movsb
  66.         inc     esi
  67. .label:
  68. }
  69. ; Process backslash.
  70. macro process_slash
  71. {
  72. ; 1. Count number of backslashes.
  73. local .label1, .label2
  74.         xor     ecx, ecx
  75. .label1:
  76.         inc     ecx
  77.         mov     al, byte [esi]
  78.         inc     esi
  79.         cmp     al, '\'
  80.         jz      .label1
  81. ; 2. If the next character is not ", backslash is a regular character;
  82. ; copy all of them.
  83.         cmp     al, '"'
  84.         jnz     .label2
  85. ; 3. If the next character is ", then only half of backslashes
  86. ; should be copied, other are escaping characters.
  87. ; If number of backslashes is odd, include " to copied chunk
  88. ; and advance to the next character.
  89.         shr     ecx, 1
  90.         jnc     .label2
  91.         mov     al, byte [esi]
  92.         inc     esi
  93.         inc     ecx
  94. .label2:
  95.         copy_arg_data
  96. }
  97.  
  98. ; Parser procedure.
  99. proc parse_cmdline
  100. ; Registers:
  101. ; ebx = argc = argument count
  102. ; ecx = size of last chunk if edi is nonzero, garbage otherwise
  103. ; al = current input character = [esi-1]
  104. ; esi = pointer to input past the current character
  105. ; edi = zero or pointer to the next output data
  106. ; edx = zero or pointer to the next output pointer
  107.         xor     ebx, ebx
  108.         xor     ecx, ecx
  109. ; There are two large blocks of code for normal and quoted modes.
  110. ; We start in normal mode.
  111. ; 1. Processing in normal mode.
  112. ; 1a. Skip initial spaces and tabs.
  113. .skip_spaces:
  114.         mov     al, byte [esi]
  115.         inc     esi
  116.         cmp     al, ' '
  117.         jz      .skip_spaces
  118.         cmp     al, 9
  119.         jz      .skip_spaces
  120. ; 1b. If the command line has ended, exit.
  121.         test    al, al
  122.         jz      .done
  123. ; 1c. Any character in this state starts a new argument.
  124.         start_arg
  125. ; 1d. Loop over the input string, watching for one of:
  126. ; (space), (tab), (terminator), ", \
  127. ; All other characters should be copied as is.
  128. ; The first character here cannot be (space), (tab) or (terminator),
  129. ; but " and \ are possible. For these, skip 1e, because we have nothing
  130. ; to copy yet, and go directly where 1f would direct us.
  131.         cmp     al, '"'
  132.         jz      .enter_quoted_mode
  133.         cmp     al, '\'
  134.         jz      .slash_normal
  135. .normal_mode:
  136.         inc     ecx
  137. .enter_normal_mode:
  138.         mov     al, byte [esi]
  139.         inc     esi
  140. .reenter_normal_mode:
  141.         cmp     al, ' '
  142.         jz      .copydata
  143.         cmp     al, 9
  144.         jz      .copydata
  145.         test    al, al
  146.         jz      .copydata
  147.         cmp     al, '\'
  148.         jz      .copydata
  149.         cmp     al, '"'
  150.         jnz     .normal_mode
  151. .copydata:
  152. ; 1e. Copy the found chunk.
  153.         copy_arg_data
  154. ; 1f. One of (space), (tab), (terminator), ", \ is found.
  155. ; For terminator, end the current argument and exit.
  156. ; For \, go to 1h.
  157. ; For ", switch to quoted mode.
  158.         test    al, al
  159.         jz      .done_termarg
  160.         cmp     al, '\'
  161.         jz      .slash_normal
  162.         cmp     al, '"'
  163.         jz      .enter_quoted_mode
  164. ; 1g. If we are here, (space) or (tab) has occured in 1d.
  165. ; End the current argument and restart processing from 1a.
  166.         test    edi, edi
  167.         jz      .skip_spaces
  168.         mov     byte [edi], 0
  169.         inc     edi
  170.         jmp     .skip_spaces
  171. .done_termarg:
  172.         test    edi, edi
  173.         jz      .done
  174.         mov     byte [edi], 0
  175.         inc     edi
  176. .done:
  177.         ret
  178. .slash_normal:
  179. ; 1h. Process chunk of slashes with possible ending " if escaped
  180. ; as described in process_slash macros.
  181. ; After that, return to loop in 1d; note that the next character can be space.
  182.         process_slash
  183.         jmp     .reenter_normal_mode
  184. ; 2. Processing in quoted mode.
  185. ; This block is simpler because the current argument never ends in quoted mode,
  186. ; except when the input ends.
  187. ; 2a. Loop over the input string, watching for one of:
  188. ; (terminator), ", \.
  189. .quoted_mode:
  190.         inc     ecx
  191. .enter_quoted_mode:
  192.         mov     al, byte [esi]
  193.         inc     esi
  194. .reenter_quoted_mode:
  195.         test    al, al
  196.         jz      .copydata2
  197.         cmp     al, '\'
  198.         jz      .copydata2
  199.         cmp     al, '"'
  200.         jnz     .quoted_mode
  201. .copydata2:
  202. ; 2b. Copy the found chunk.
  203.         copy_arg_data
  204. ; 2c. One of (terminator), ", \ is found.
  205. ; For terminator, end the current argument and exit.
  206. ; For \, go to 2d.
  207.         test    al, al
  208.         jz      .done_termarg
  209.         cmp     al, '\'
  210.         jz      .slash_quoted
  211. ; For ", check whether the next character is also ":
  212. ; for a single quote, switch to the normal mode 1d,
  213. ; for a double quote, skip the first quote
  214. ; and start a new chunk from the second one.
  215.         cmp     byte [esi], '"'
  216.         jnz     .enter_normal_mode
  217. .double_quote:
  218.         inc     esi
  219.         jmp     .quoted_mode
  220. .slash_quoted:
  221. ; 2d. Process chunk of slashes with possible ending " if escaped
  222. ; as described in process_slash macros.
  223. ; After that, return to loop in 2a.
  224.         process_slash
  225.         jmp     .reenter_quoted_mode
  226. endp
  227. purge start_arg
  228. purge copy_arg_data
  229. purge process_slash
  230.