;---- urldecode.asm ----------------------------------------------------------;
;
; URL decode a sequence of octets (RFC 1738).
;
; The sequence comes from stdin or command line arguments. The output
; is sent to stdout.
;
; Options:
;
;	-e - end of options
;	-h - help
;	-l - append a new line
;	-p - decode + into space
;	-r - version
;
; This code, when assembled and linked, will work under FreeBSD, and perhaps
; other BSD systems. It requires NASM for assembly:
;
;	nasm -f elf urldecode.asm
;	ld -o urldecode urldecode.o
;	strip unrldecode
;
; Started:	23 Oct 2000
; Updated:	25 Oct 2000
;
; Version 1.0
;
; Copyright (c) 2000 G. Adam Stanislav
; All rights reserved.
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions
; are met:
; 1. Redistributions of source code must retain the above copyright
;    notice, this list of conditions and the following disclaimer.
; 2. Redistributions in binary form must reproduce the above copyright
;    notice, this list of conditions and the following disclaimer in the
;    documentation and/or other materials provided with the distribution.
;
; THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
; ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
; ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
; FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
; OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
; HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
; LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
; OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
; SUCH DAMAGE.
;
;-----------------------------------------------------------------------------;

%define	BUFSIZE	2048
%define	PFLAG	01h
%define	PERC	02h
%define	LFLAG	04h
%define	EFLAG	08h

section	.bss
ibuffer	resb	BUFSIZE
obuffer	resb	BUFSIZE

section	.data
rerr	db	'URLDECODE: Read error.', 0Ah
rerrlen	equ	$-rerr
align 4
werr	db	'URLDECODE: Write error.', 0Ah
werrlen	equ	$-werr
align 4
rmsg	db	'URLDECODE version 1.0 (25 Oct 2000)', 0Ah
	db	'Copyright 2000 G. Adam Stanislav', 0Ah
	db	'All rights reserved.', 0Ah
rlen	equ	$-rmsg
	db	0Ah
umsg	db	'Usage: urldecode [options] [string ...]', 0Ah, 0Ah
	db	09h, '-e = decode rest of arguments; exit if [string ...] is missing', 0Ah
	db	09h, '-h = help', 0Ah
	db	09h, '-l = append a new line', 0Ah
	db	09h, "-p = decode `+' into spaces", 0Ah
	db	09h, '-r = version', 0Ah, 0Ah
ulen	equ	$-umsg
	db	"If no [string ...] is specified, input comes from stdin unless the `-e' option", 0Ah
	db	'is used.', 0Ah, 0Ah
hlen	equ	$-rmsg

section	.code

decode:
	cmp	al, '+'
	jne	.perc

	test	ah, PFLAG
	je	putchar

	mov	al, ' '
	jmp	short putchar

.perc:
	cmp	al, '%'
	jne	putchar

	or	ah, PERC
	; In here, getchar works right even if we are getting it from
	; the command line and not stdin. In that case EBX is so huge
	; getchar will not default to calling "read" unless the command
	; line contains billions of `%'.
	call	getchar	
	and	ah, ~PERC

	; We should have a hex digit in AL. If not, print % and "unget"
	; AL (INC EBX / DEC ESI will do that).
	sub	dl, dl
	mov	dh, al

	cmp	al, '0'
	jl	.perc1

	cmp	al, '9'
	ja	.lc1

	sub	al, '0'
	jmp	short .hex2

align 4
.lc1:
	cmp	al, 'f'
	ja	.perc1

	cmp	al, 'a'
	jb	.uc1

	add	al, 10 - 'a'
	jmp	short .hex2

.uc1:
	cmp	al, 'A'
	jb	.perc1
	cmp	al, 'F'
	jbe	.subA1

.perc1:
	mov	al, '%'
	inc	ebx
	dec	esi
	jmp	short putchar

align 4
.subA1:
	add	al, 10 - 'A'

.hex2:
	mov	dl, al
	; Again, this works for both, stdin and command line input.
	call	getchar

	cmp	al, '0'
	jl	.perc2

	cmp	al, '9'
	ja	.lc2

	sub	al, '0'
	jmp	short .decode

.lc2:
	cmp	al, 'f'
	ja	.perc2

	cmp	al, 'a'
	jb	.hc2

	add	al, 10 - 'a'
	jmp	short .decode

.perc2:
	inc	ebx
	dec	esi
	mov	al, '%'
	call	putchar
	mov	al, dh
	jmp	short putchar

.hc2:
	cmp	al, 'A'
	jb	.perc2

	cmp	al, 'F'
	ja	.perc2

	add	al, 10 - 'A'

.decode:
	shl	dl, 4
	or	al, dl

	; Fall through

align 4
putchar:
	stosb
	inc	ecx

	cmp	al, 0Ah
	je	write

	cmp	ecx, BUFSIZE
	jb	write.done

	; Fall through to write

write:
	; Send the contents of the output buffer to stdout.
	; The buffer starts at EDI - ECX and is ECX bytes long.

	jecxz	.done			; Empty buffer, do nothing

	push	eax
	sub	edi, ecx		; Find start of buffer
	sub	eax, eax
	push	ecx
	inc	al			; stdout = 1
	push	edi
	push	eax
	push	edx
	mov	al, 4			; SYS_write
	int	80h
	pop	edx
	sub	ecx, ecx		; Buffer is now empty
	add	esp, byte 12
	or	eax, eax
	js	.errexit
	pop	eax

.done:
	ret

align 4
.errexit:
	sub	eax, eax
	push	dword werrlen
	mov	al, 2			; stderr
	push	dword werr
	push	eax
	push	edx
	add	al, al			; SYS_write
	int	80h

	sub	eax, eax
	mov	al, 2			; return failure
	push	eax
	push	edx
	dec	al			; SYS_exit
	int	80h

align 4
getchar:
	or	ebx, ebx
	jne	.fetch
	call	read

.fetch:
	lodsb
	dec	ebx
	ret

align 4
read:
	push	eax
	push	ecx
	push	dword BUFSIZE
	mov	esi, ibuffer
	sub	eax, eax		; stdin = 0
	push	esi
	push	eax
	push	edx
	mov	al, 3			; SYS_read
	int	80h
	pop	edx
	add	esp, byte 12
	pop	ecx
	or	eax, eax
	mov	ebx, eax
	pop	eax
	je	.exit
	js	.errexit
	ret

align 4
.exit:
	test	ah, PERC
	je	.l

	mov	al, '%'
	call	putchar

.l:
	test	ah, LFLAG
	je	.flush

	mov	al, 0Ah
	call	putchar

.flush:
	call	write			; Flush output buffer

	sub	eax, eax		; Return success
	push	eax
	push	edx
	inc	al			; SYS_exit
	int	80h

.errexit:
	sub	eax, eax
	push	dword rerrlen
	mov	al, 2			; stderr
	push	dword rerr
	push	eax
	push	edx
	add	al, al			; SYS_write
	int	80h

	sub	eax, eax
	inc	al			; Return failure, SYS_exit
	push	eax
	push	eax
	int	80h

global	_start
_start:
	sub	eax, eax
	sub	ebx, ebx
	sub	ecx, ecx
	sub	edx, edx
	not	ebx
	add	esp, byte 8
	mov	edi, obuffer
	cld

.next:
	pop	esi
	or	esi, esi
	jne	.dash
	test	ah, EFLAG
	jne	read.exit

	not	ebx
	jmp	.loop

.dash:
	test	ah, EFLAG
	jne	near .cmdinnerloop

	lodsb
	cmp	al, '-'
	jne	near .cmdfirst

	cmp	byte [esi], 0
	je	.err

.cmd:
	lodsb
	or	al, al
	je	.next

	cmp	al, 'e'
	jne	.l

	or	ah, EFLAG
	jmp	short .cmd

.l:
	cmp	al, 'l'
	jne	.p

	or	ah, LFLAG
	jmp	short .cmd

.p:
	cmp	al, 'p'
	jne	.r

	or	ah, PFLAG
	jmp	short .cmd

.r:
	cmp	al, 'r'
	jne	.h

	sub	ah, ah
	push	dword rlen

.msg:
	mov	al, 2			; stderr
	push	dword rmsg
	push	eax
	push	edx
	add	al, al			; SYS_write
	int	80h

	sub	eax, eax		; return success
	push	eax
	push	eax
	inc	al			; SYS_exit
	int	80h

.h:
	cmp	al, 'h'
	jne	.err

	sub	ah, ah
	push	dword hlen
	jmp	short .msg

.err:
	sub	ah, ah
	push	dword ulen
	mov	al, 2			; stderr
	push	dword umsg
	push	eax
	push	edx
	add	al, al			; SYS_write
	int	80h

	mov	eax, -1			; return failure (-1)
	push	eax
	push	edx
	neg	eax			; SYS_exit
	int	80h

	; While this may seem like an endless loop, it is not.
	; The read procedure exits to the OS when there is no
	; input left to read.

align 4
.loop:
	call	getchar
	call	decode
	jmp	short .loop

	; Process input from the command line instead of stdin.

align 4
.cmdloop:
	pop	esi
	or	esi, esi
	je	near read.exit

	mov	al, ' '
	call	putchar

.cmdinnerloop:
	lodsb

.cmdfirst:
	or	al, al
	je	.cmdloop
	call	decode
	jmp	short .cmdinnerloop

;------ End of code -----------------------------------------------------------

