package Pista::Grupid;

use strict;
use warnings;

sub read_xml {
	my $filename = shift;

	open(FILE, "<$filename") or die "Cannot open $filename: $!\n";
	my $element = { };	# name, content, attrib

	my $text = join('', <FILE>);			# read the whole file
	$text =~ s/[\r\n]+/ /g;				# delete line breaks
	$text =~ s/<!--.*?-->//g;			# delete comments
	$text =~ s/<!DOCTYPE\s+[^>]+(\[.*?\])?\s*>//g;	# delete DTD
	$text =~ s/<\?.*?\?>//g;			# delete PI
	$text =~ s/>\s+/>/g;				# delete whitespace
	$text =~ s/\s+</</g;				# delete whitespace

	close FILE;

	my (@instack, @elementstack);
	while ($text) {
		if ($text =~ m|^(<(\w+)[^/>]*/>)(.*)$|) {	# EmptyElemTag
			$text = $3;
			push(@{$element->{content}}, _parse_tag($1));
		}
		elsif ($text =~ m|^(<(\w+)[^>]*>)(.*)$|) {	# STag
			$text = $3;
			push(@instack,$2);
			push(@elementstack, $element);
			$element = _parse_tag($1);
			$element->{content} = [ ];
		}
		elsif ($text =~ m|^([^<]+)(.*?)$|) {		# content
			$text = $2;
			push(@{$element->{content}}, $1);
		}
		elsif ($text =~ m|^(</(\w+)>)(.*)$|) {		# ETag
			$text = $3;
			my $tos = pop(@instack);
			if ($2 ne $tos) {
				die "XML parse error: no </$tos>\n";
			}
			$tos = pop(@elementstack);
			push(@{$tos->{content}}, $element);
			$element = $tos;
		}
		else {
			die "Cannot process text; $text\n";
		}
	}
	return $element->{content}->[0];
}

sub _parse_tag {
	my $tag = shift;
	my $element = { };

	return undef unless $tag =~ m|^<(\w+)(.*)>$|;
	$element->{name} = $1;
	$tag = $2;

	while ($tag =~ m|^\s+(\w+)\s*=\s*(['"])(.*?)\2(.*)$|) {
		$element->{attrib}->{$1} = $3;
		$tag = $4;
	}
	return $element;
}

# overrides CORE::dump()
sub dump_xml {
	my $element = shift;
	my $level = shift || 0;
	print ' ' x $level, "<$element->{name}";
	if (exists $element->{attrib}) {
		while (my ($key, $value) = each %{$element->{attrib}}) {
			print qq{ $key="$value"};
		}
	}
	if (exists $element->{content}) {
		print ">\n";
		$level++;
		for (@{$element->{content}}) {
			if (ref $_ eq 'HASH') {
				dump_xml($_, $level);
			}
			else {
				print ' ' x $level, "$_\n";
			}
		}
		$level--;
		print ' ' x $level, "</$element->{name}>\n";
	}
	else {
		print "/>\n";
	}
}

1;
