"""Example created in response to a question by Runsun Pan

This parses a simple line-oriented language where a section
in a bibliography-like system is denoted by sentinel values
which precede the section as the first characters of the
line.
"""

from simpleparse.parser import Parser
from simpleparse.common import chartypes # gets EOF

declaration = """

<ts> := whitespace*
<endoffile> := EOF

paper := ts, section+, !, endoffile

# a regular line is a line that is not
# started by a "sentinel" (our special headers)

section := (AU_section / TI_section / SO_section / AB_section)
# we expand the section-types since, at the moment
# we're not doing anything with their differences
>AU_section< := AU, ts, section_content
>TI_section< := TI, ts, section_content
>SO_section< := SO, ts, section_content
>AB_section< := AB, ts, section_content

# that could as easily have been written as:
# section := (AU/TI/SO/AB), section_content
# but we'll assume that people will want to
# know which type of section they've matched,
# and it's easy to imagine wanting AB sections
# to have a different internal structure than
# the others.

>section_content< := regular_line+
>regular_line< := ?-sentinel, line_content, "\n"?
line_content := -[\n]*

AU := "AU"
TI := "TI"
SO := "SO"
AB := "AB"

# we don't want to report the sentinel
# not being there for every line...
<sentinel> := (AU/TI/SO/AB)

"""

testFile = """
AU Chen Jiqiu; Kuhlencordt Peter J; Astern Joshua; Gyurko Robert; Huang Paul
L [a].
TI Hypertension does not account for the accelerated atherosclerosis and
development of aneurysms in male apolipoprotein E/endothelial nitric oxide
synthase double knockout mice.
SO Circulation. [print] 104(20). November 13, 2001. 2391-2394.
AB Background: Apolipoprotein E (apoE)/endothelial nitric oxide synthase (eNOS)
double knockout (DKO) mice

AU White Thomas W [a]; Sellitto Caterina; Paul David L; Goodenough Daniel
A.
TI Prenatal lens development in connexin43 and connexin50 double knockout
mice.
SO Iovs. [print] 42(12). November, 2001. 2916-2923.
AB Purpose. To determine the roles of intercellular communication in embryonic
eye growth and development, mice with a targeted deletion of the Cx43 gene were
examined
"""

paper = Parser( declaration, 'paper').parse( testFile )[1]

for section in paper:
	type = section[3][0]
	rest = section[3][1:]
	print "____", type[0], "____"
	for line in rest:
		print testFile[line[1]:line[2]]
		
