;---- urlencode.asm ----------------------------------------------------------;
;
; URL encode a sequence of octets (RFC 1738).
;
; The sequence comes from the command line. If no sequence is specified,
; it is taken from stdin. The output is sent to stdout.
;
; The characters a-zA-Z0-9 are not encoded. Additionally, any ASCII 
; non-control 7-bit characters, except the percent sign (%), that appear
; on the command line are not encoded.
;
; Everything else is encoded into %XX where XX is the hexadecimal value
; of the octet, always two bytes long.
;
; Valid options are:
;
;	-a - encode alphanumerics
;	-d - encode [0-9]
;	-e - end of options
;	-h - help
;	-n - do not encode new lines
;	-p - encode spaces as +
;	-l - append a new line at the end
;	-r - version info
;	-t - encode [A-Za-z]
;
; This code, when assembled and linked, will work under FreeBSD, and perhaps
; other BSD systems. It requires NASM for assembly:
;
;	nasm -f elf urlencode.asm
;	ld -o urlencode urlencode.o
;	strip unrlencode
;
; Started:	23 Oct 2000
; Updated:	24 Oct 2000
;
; Version 1.0
;
; Copyright (c) 2000 G. Adam Stanislav
; All rights reserved.
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions
; are met:
; 1. Redistributions of source code must retain the above copyright
;    notice, this list of conditions and the following disclaimer.
; 2. Redistributions in binary form must reproduce the above copyright
;    notice, this list of conditions and the following disclaimer in the
;    documentation and/or other materials provided with the distribution.
;
; THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
; ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
; ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
; FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
; OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
; HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
; LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
; OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
; SUCH DAMAGE.
;
;-----------------------------------------------------------------------------;

%define	BUFSIZE	2048
%define	NFLAG	01h
%define	PFLAG	02h
%define	LFLAG	04h
%define	PERC	08h
%define	TFLAG	10h
%define	DFLAG	20h
%define	XFLAG	40h
%define	EFLAG	80h
%define	AFLAG	(TFLAG|DFLAG)

section	.bss
ibuffer	resb	BUFSIZE
obuffer	resb	BUFSIZE

section	.data
etable	times 256	db	0
hex	db	'0123456789ABCDEF'
rerr	db	'URLENCODE: Read error.', 0Ah
rerrlen	equ	$-rerr
align 4
werr	db	'URLENCODE: Write error.', 0Ah
werrlen	equ	$-werr
align 4
rmsg	db	'URLENCODE version 1.0 (24 Oct 2000)', 0Ah
	db	'Copyright 2000 G. Adam Stanislav', 0Ah
	db	'All rights reserved.', 0Ah
rlen	equ	$-rmsg
	db	0Ah
umsg	db	'Usage: urlencode [options] [string ...]', 0Ah, 0Ah
	db	09h, '-% = encode % (only needed with -x)', 0Ah
	db	09h, '-a = encode alphanumeric characters (same as -dt)', 0Ah
	db	09h, '-d = encode digits (0-9)', 0Ah
	db	09h, '-e = encode rest of arguments; exit if [string ...] is missing', 0Ah
	db	09h, '-h = help', 0Ah
	db	09h, '-l = append a new line', 0Ah
	db	09h, '-n = do not encode new lines', 0Ah
	db	09h, "-p = encode spaces as `+' (ignored if excluded)", 0Ah
	db	09h, '-r = print version information', 0Ah
	db	09h, '-t = encode alphabetic characters (A-Za-z)', 0Ah
	db	09h, '-x = exclude all (no encoding)', 0Ah
	db	09h, '-[list] = exclude listed characters', 0Ah, 0Ah
ulen	equ	$-umsg
	db	'The exclusion list may be URL-encoded. It may contain ranges. Its members', 0Ah
	db	"may be escaped with `\' (e.g., `-[\-\%\]\\]' excludes `-', `%', `]', and `\'", 0Ah
	db	"from encoding, `-[%23-%26]' excludes `#', `$', `%', and `&').", 0Ah, 0Ah
	db	'If no [string ...] is specified, input comes from stdin.', 0Ah
hlen	equ	$-rmsg

section	.code

encode:
	mov	dl, al

	test	ah, XFLAG
	je	.notx

	cmp	al, '%'
	jne	.xa

	test	ah, PERC
	jne	near .go

.xa:
	test	ah, AFLAG
	je	near putchar

	test	ah, DFLAG
	je	.xt

	cmp	al, '0'
	jl	near putchar
	cmp	al, '9'
	jbe	.cmd

.xt:
	cmp	al, 'z'
	ja	near putchar

	cmp	al, 'a'
	jae	.cmd

	cmp	al, 'A'
	jb	near putchar

	cmp	al, 'Z'
	ja	near putchar
	jmp	short .cmd

.notx:
	cmp	al, 0Ah
	jne	.test

	test	ah, NFLAG
	jne	putchar

.test:
	test	ah, DFLAG
	jne	.alpha

	cmp	al, '0'
	jb	.cmd
	cmp	al, '9'
	jbe	putchar

.alpha:
	test	ah, TFLAG
	jne	.cmd

	cmp	al, 'A'
	jb	.cmd
	cmp	al, 'Z'
	jbe	putchar

	cmp	al, 'a'
	jb	.cmd
	cmp	al, 'z'
	jbe	putchar

.cmd:
	; Check if the char is at the command line
	cmp	byte [etable+edx], 0
	jne	putchar

.encode:
	cmp	dl, ' '
	jne	.go
	test	ah, PFLAG
	je	.go

	mov	al, '+'
	jmp	short putchar

.go:
	mov	al, '%'
	call	putchar

	push	edx
	shr	dl, 4
	mov	al, [hex+edx]
	pop	edx
	call	putchar

	and	dl, 0Fh
	mov	al, [hex+edx]

	; Fall through

align 4
putchar:
	stosb
	inc	ecx
	cmp	ecx, BUFSIZE
	jb	write.done

	; Fall through to write

write:
	; Send the contents of the output buffer to stdout.
	; The buffer starts at EDI - ECX and is ECX bytes long.

	jecxz	.done			; Empty buffer, do nothing

	push	eax
	sub	edi, ecx		; Find start of buffer
	sub	eax, eax
	push	ecx
	inc	al			; stdout = 1
	push	edi
	push	eax
	push	edx
	mov	al, 4			; SYS_write
	int	80h
	pop	edx
	sub	ecx, ecx		; Buffer is now empty
	add	esp, byte 12
	or	eax, eax
	js	.errexit
	pop	eax

.done:
	ret

align 4
.errexit:
	sub	eax, eax
	push	dword werrlen
	mov	al, 2			; stderr
	push	dword werr
	push	eax
	push	edx
	add	al, al			; SYS_write
	int	80h

	sub	eax, eax
	mov	al, 2			; return failure
	push	eax
	push	edx
	dec	al			; SYS_exit
	int	80h

align 4
getchar:
	or	ebx, ebx
	jne	.fetch
	call	read

.fetch:
	lodsb
	dec	ebx
	ret

align 4
read:
	push	eax
	push	ecx
	push	dword BUFSIZE
	mov	esi, ibuffer
	sub	eax, eax		; stdin = 0
	push	esi
	push	eax
	push	edx
	mov	al, 3			; SYS_read
	int	80h
	pop	edx
	add	esp, byte 12
	pop	ecx
	or	eax, eax
	mov	ebx, eax
	pop	eax
	je	.exit
	js	.errexit
	ret

align 4
.exit:
	test	ah, LFLAG
	je	.flush

	mov	al, 0Ah
	call	putchar

.flush:
	call	write			; Flush output buffer

	sub	eax, eax		; Return success
	push	eax
	push	edx
	inc	al			; SYS_exit
	int	80h

.errexit:
	sub	eax, eax
	push	dword rerrlen
	mov	al, 2			; stderr
	push	dword rerr
	push	eax
	push	edx
	add	al, al			; SYS_write
	int	80h

	sub	eax, eax
	inc	al			; Return failure, SYS_exit
	push	eax
	push	eax
	int	80h

global	_start
_start:
	sub	eax, eax
	sub	ebx, ebx
	sub	ecx, ecx
	sub	edx, edx
	lea	ebp, [esp+8]
	mov	edi, obuffer
	cld

.next:
	mov	esi, [ebp]

	or	esi, esi
	je	near .preloop

	test	ah, EFLAG
	jne	near .cmdinnerloop
	lodsb
	cmp	al, '-'
	jne	near .cmdfirst

	cmp	byte [esi], 0
	je	near .loop

	add	ebp, byte 4

.cmd:
	lodsb
	or	al, al
	je	.next

	cmp	al, '['
	je	near .parse

.perc:
	cmp	al, '%'
	jne	.t

	or	ah, PERC
	jmp	short .cmd

.t:
	cmp	al, 't'
	jne	.d

	or	ah, TFLAG
	jmp	short .cmd

.d:
	cmp	al, 'd'
	jne	.e

	or	ah, DFLAG
	jmp	short .cmd

.e:
	cmp	al, 'e'
	jne	.x

	or	ah, EFLAG
	jmp	short .cmd

.x:
	cmp	al, 'x'
	jne	.a

	or	ah, XFLAG
	jmp	short .cmd

.a:
	cmp	al, 'a'
	jne	.p

	or	ah, AFLAG
	jmp	short .cmd

.p:
	cmp	al, 'p'
	jne	.n

	or	ah, PFLAG
	jmp	short .cmd

.n:
	cmp	al, 'n'
	jne	.l

	or	ah, NFLAG
	jmp	short .cmd

.l:
	cmp	al, 'l'
	jne	.r

	or	ah, LFLAG
	jmp	short .cmd

.r:
	cmp	al, 'r'
	jne	.h

	sub	ah, ah
	push	dword rlen

.msg:
	mov	al, 2			; stderr
	push	dword rmsg
	push	eax
	push	edx
	add	al, al			; SYS_write
	int	80h

	sub	eax, eax		; return success
	push	eax
	push	eax
	inc	al			; SYS_exit
	int	80h

.h:
	cmp	al, 'h'
	jne	.err

	sub	ah, ah
	push	dword hlen
	jmp	short .msg

.err:
	sub	ah, ah
	push	dword ulen
	mov	al, 2			; stderr
	push	dword umsg
	push	eax
	push	edx
	add	al, al			; SYS_write
	int	80h

	mov	eax, -1			; return failure (-1)
	push	eax
	push	edx
	neg	eax			; SYS_exit
	int	80h

	; While this may seem like an endless loop, it is not.
	; The read procedure exits to the OS when there is no
	; input left to read.

.preloop:
	test	ah, EFLAG
	jne	near read.exit

.loop:
	call	getchar
	cmp	al, 0Ah
	je	.nl

	call	encode
	jmp	short .loop

.nl:
	call	encode
	call	write
	jmp	short .loop

	; Parse the exclude list.
.parse:
	lodsb

.parsechar:
	or	al, al
	je	.err

	cmp	al, ']'
	je	near .cmd

	call	.parseperc

.ch1:
	; We have a character to exclude. It is either just a character,
	; or the first of a range.
	movzx	edx, al
	mov	byte [etable+edx], -1

	lodsb
	cmp	al, '-'
	jne	.parsechar

	lodsb
	or	al, al
	je	.err

	cmp	al, ']'
	je	.err

	call	.parseperc

	cmp	dl, al
	je	.parse
	ja	.down
	movzx	ebx, al

.store:
	mov	byte [etable+ebx], -1
	dec	bl
	cmp	bl, dl
	ja	.store

.stored:
	sub	bl, bl

	jmp	short .parse

.down:
	movzx	ebx, al

.dstore:
	mov	byte [etable+ebx], -1
	inc	bl
	cmp	bl, dl
	jb	.dstore
	jmp	short .stored

.parseperc:
	cmp	al, '\'
	jne	.tryperc

	lodsb
	or	al, al
	je	near .err

.gotit:
	ret

.tryperc:
	cmp	al, '%'
	jne	.gotit

	lodsb
	cmp	al, '0'
	jl	near .err

	cmp	al, '9'
	ja	.ul1

	sub	al, '0'
	jmp	short .hex2

.ul1:
	cmp	al, 'A'
	jb	near .err

	cmp	al, 'F'
	ja	.ll1

	add	al, 10 - 'A'
	jmp	short .hex2

.ll1:
	cmp	al, 'a'
	jb	near .err

	cmp	al, 'f'
	ja	near .err

	add	al, 10 - 'a'

.hex2:
	mov	dh, al

	lodsb
	cmp	al, '0'
	jl	near .err

	cmp	al, '9'
	ja	.ul2

	sub	al, '0'
	jmp	short .hexit

.ul2:
	cmp	al, 'A'
	jb	near .err

	cmp	al, 'F'
	ja	.ll2

	add	al, 10 - 'A'
	jmp	short .hexit

.ll2:
	cmp	al, 'a'
	jb	near .err

	cmp	al, 'f'
	ja	near .err

.hexit:
	shl	dh, 4
	or	al, dh
	sub	dh, dh
	ret

.cmdbigloop:
	add	ebp, byte 4
	mov	esi, [ebp]
	or	esi, esi
	je	near read.exit

	mov	al, ' '
	call	encode

.cmdinnerloop:
	lodsb

.cmdfirst:
	or	al, al
	je	.cmdbigloop
	call	encode
	jmp	short .cmdinnerloop

;------ End of code -----------------------------------------------------------

