// tag.cpp
// Implementation file.

// HTMLTagAttribute

#include "tag.h"
#include "error.h"
#include "misc.h"

const char *asciiWhiteSpace = " \n\r\t\b\f\v\a";
string whiteSpace;

void initWhiteSpace() {
	if (whiteSpace.length()==0) {
		whiteSpace = asciiWhiteSpace;
	}
}

const string HTMLTagError::message() {
	string errString("HTML Tag error: ");
	switch (error) {
	case eNoSuchAttribute:
		errString += "No such attribute";
	}
	return errString;
}

HTMLTagAttribute::HTMLTagAttribute() {
}

HTMLTagAttribute::HTMLTagAttribute(const HTMLTagAttribute &attrib) {
	*this = attrib;
}

HTMLTagAttribute::HTMLTagAttribute(const string &inName) {
	name = upperCase(inName);
}

HTMLTagAttribute::HTMLTagAttribute(const string &inName, const string &inValue) {
	name = upperCase(inName);
	// Need to strip beginning/ending quotes and remove beginning backslashes.
	value = inValue;
	clean(value);
}

HTMLTagAttribute::~HTMLTagAttribute() {
}

void HTMLTagAttribute::clean(string &inString) {
	// test for beginning/ending quotes.
	string tmpString;
	bool stripQuotes = false;
	if (inString.length()-1 > 0 && inString[0] == '"' && inString[inString.length()-1] == '"') {
		stripQuotes = true;
	}
	// strip out backslashes.
	for (size_t i = 0 ; i < inString.length(); i++) {
		char ch = inString[i];
		if (stripQuotes && i == 0 ) {
			continue;
		}
		if (stripQuotes && i == inString.length()-1) {
			break;
		}
		if (ch == '\\') {
			if (i-1 > 0 && inString[i-1]!='\\') {
				continue;
			}
		}
		tmpString += ch;
	}
	inString = tmpString;
}

bool operator==(const HTMLTagAttribute &a, const HTMLTagAttribute &b) {
	return no_case_compare(a.name, b.name);
}

bool operator<(const HTMLTagAttribute &a, const HTMLTagAttribute &b) {
	return a.name < b.name;
}

HTMLTagAttribute &HTMLTagAttribute::operator=(const HTMLTagAttribute &attrib) {
	name = attrib.name;
	value = attrib.value;
	return *this;
}


// HTMLTag

HTMLTag::HTMLTag() {
}

HTMLTag::HTMLTag(const string::iterator &begin, const string::iterator &end) {
	// Strategy:
	// Hunt through this, finding any attributes, and adding them to the
	// attribute vector.
	textEnd = end;
	// First, let's find out what tag this is.
	initWhiteSpace();
	string wsGT(whiteSpace);
	wsGT += ">";
	string::iterator curPos = skipNotSet(begin, end, wsGT);
	tag = string(begin+1, curPos); // have to skip the first '<' char.
	// Now, let's find the attributes.
	// An attribute has any sequence of characters except whitespace or an '=' character.
	// If an '=' character exists, the attribute has a value to be stored.
	// Attribute processing terminates with the ">" character.
	curPos = skipWhiteSpace(curPos, end);
	if (*curPos == '>') {
		// it's an empty tag, no attributes.
		tagEnd = curPos;
		return;
	}
	string::iterator startAttribs = curPos;
	string wsEqualGT(whiteSpace); // whiteSpace initialized by skipWhiteSpace
	wsEqualGT += '=';
	wsEqualGT += '>';
	string::iterator next = skipWhiteSpace(curPos, end);
	while ((*next != '>') && (next != end)) {
		// We have an attribute.  Let's put it in the list.
		next = ParseAttribute(next, end);
		if (next == end || *next == '>') {
			// Either at the end of the file, or
			// at the end of the tag.  Nothing more to do.
			tagEnd = next;
		}
	}
}

const string::iterator HTMLTag::ParseAttribute(const string::iterator &start, const string::iterator &end){
	// Strategy:
	//
	// Find attribute, and if it has a value, find it, too.  Create it,
	// then return the position after the attribute or value.
	string val;
	string attr;
	string wsEqualGT(whiteSpace);
	wsEqualGT += '=';
	wsEqualGT += '>';
	string::iterator myStart = start;
	myStart = skipWhiteSpace(myStart, end);
	string::iterator attrEnd = skipNotSet(myStart, end, wsEqualGT);
	attr = string(myStart, attrEnd);
	string::iterator myEnd = skipWhiteSpace(attrEnd, end);
	if (*myEnd == '=') {
		// We have a value with this attribute.
		string::iterator valStart = skipWhiteSpace(myEnd+1, end);
		string::iterator valEnd = skipValue(valStart, end);
		val = string(valStart, valEnd);
		myEnd = skipWhiteSpace(valEnd, end);
		attributes.push_back(HTMLTagAttribute(attr, val));
	} else {
		attributes.push_back(HTMLTagAttribute(attr));
	}
	return myEnd;
}

HTMLTag::HTMLTag(const HTMLTag &origTag) {
	*this = origTag;
}

HTMLTag::~HTMLTag() {
}

HTMLTagAttribute &HTMLTag::GetAttribute(const string &attribute) {
	if (attribute.length()==0) {
		throw HTMLTagError(HTMLTagError::eNoSuchAttribute);
	}
	string myAttrib = attribute;
	HTMLTagAttribute searchAttrib(myAttrib);
	vector<HTMLTagAttribute>::iterator found = find(attributes.begin(), attributes.end(), searchAttrib);
	if (found == attributes.end()) {
		throw HTMLTagError(HTMLTagError::eNoSuchAttribute);
	}
	return *found;
}

const bool HTMLTag::operator==(const HTMLTag &origTag) {
	return tag == origTag.tag;
}

HTMLTag &HTMLTag::operator=(const HTMLTag &origTag) {
	tag = origTag.tag;
	attributes = origTag.attributes;
	return *this;
}

const string::iterator &HTMLTag::GetTextEnd() {
	return textEnd;
}

// HTMLTagContainer

HTMLTagContainer::HTMLTagContainer() {
}

HTMLTagContainer::HTMLTagContainer(const HTMLTag &newTag) {
	// Strategy:
	// Collect all text from the end of 'newTag' to the closing tag.
	tag = newTag;
	tagEnd = skipEndTag(begin(), tag.GetTextEnd(), tag.GetTag());
	contents = string(begin(), end());
}

HTMLTagContainer::HTMLTagContainer(const HTMLTagContainer &origTag) {
	*this = origTag;
}

HTMLTagContainer::~HTMLTagContainer() {
}

const bool HTMLTagContainer::operator==(const HTMLTagContainer &origTag) {
	return (tag == origTag.tag) && (contents == origTag.contents);
}

HTMLTagContainer &HTMLTagContainer::operator=(const HTMLTagContainer &origTag) {
	tag = origTag.tag;
	contents = origTag.contents;
	tagEnd = origTag.tagEnd;
	return *this;
}

const string::iterator skipSet(
	const string::iterator &start,
	const string::iterator &end,
	const string &charSet
) {
	string::iterator myStart = start;
	while (myStart != end) {
		if (find(charSet.begin(), charSet.end(), *myStart) != charSet.end()) {
			// Didn't find non-whitespace characters.  Try the next character.
			++myStart;
		} else {
			return myStart;
		}
	}
}

const string::iterator skipNotSet(
	const string::iterator &start,
	const string::iterator &end,
	const string &charSet
) {
	string::iterator myStart = start;
	while (myStart != end) {
		if (find(charSet.begin(), charSet.end(), *myStart) == charSet.end()) {
			// Didn't find non-whitespace characters.  Try the next character.
			++myStart;
		} else {
			return myStart;
		}
	}
}

const string::iterator skipWhiteSpace (
	const string::iterator &start,
	const string::iterator &end
) {
	initWhiteSpace();
	return skipSet(start, end, whiteSpace);
}

const string::iterator skipNonWhiteSpace (
	const string::iterator &start,
	const string::iterator &end
) {
	initWhiteSpace();
	return skipNotSet(start, end, whiteSpace);
}

const string::iterator skipValue(
	const string::iterator &start,
	const string::iterator &end
) {
	string::iterator myStart = start;
	// If the value starts with a '"' character,
	// the value will end with a '"' character.. although
	// we have to process for backslash characters to provide a means
	// for folks to specify quotes in their strings.
	// If the value doesn't start with a quote character, 
	// we're good.
	//
	// First, let's skip any whitespace that might be there...
	myStart = skipWhiteSpace(myStart, end);
	if ((*myStart == '\'') || (*myStart == '\"')){
		char quoteChar = *myStart;
		while (myStart != end) {
			++myStart;
			if (*myStart == '\\') {
				++myStart;
			} else if (*myStart == quoteChar) {
				++myStart;
				break;
			}
		}
		return myStart;
	}
	string wsGT = whiteSpace;
	wsGT += '>';
	return skipNotSet(myStart, end, wsGT);
}

const string::iterator skipEndTag(
	const string::iterator &start,
	const string::iterator &end,
	const string &tag
) {
	string::iterator myStart = start;
	while (myStart != end) {
		myStart = skipWhiteSpace(myStart, end);
		if (*myStart == '<') {
			string::iterator endTagStart = myStart;
			myStart = skipWhiteSpace(myStart, end);
			if (*myStart == '/') {
				initWhiteSpace();
				string eot = whiteSpace;
				eot += '>';
				string::iterator next = skipNotSet(myStart, end, eot);
				string foundEndTag = string(myStart, next);
				if (no_case_compare(tag, foundEndTag)) {
					return endTagStart;
				}
			}
		}
	}
	return myStart;
}


bool no_case_compare(const string &a, const string &b) {
	if (a == b) return true;
	if (a.length() != b.length()) return false;
	for (int i = 0; i < a.length(); i++) {
		if (!no_case(a[i], b[i])) return false;
	}
}

bool no_case(const char &a, const char &b) {
	// Try it straightforwardly...
	if (a == b) return true;
	char aa = toupper(a);
	char bb = toupper(b);
	return (aa == bb);
}

// vim:ai ts=4
