doubango/tinyHTTP/ragel/thttp_parser_url.rl
c732d49e
 /*
 * Copyright (C) 2010-2015 Mamadou Diop.
 *	
 * This file is part of Open Source Doubango Framework.
 *
 * DOUBANGO is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *	
 * DOUBANGO is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *	
 * You should have received a copy of the GNU General Public License
 * along with DOUBANGO.
 *
 */
 /**@file thttp_parser_url.c
  * @brief HTTP/HTTPS URL parser.
  */
 #include "tinyhttp/parsers/thttp_parser_url.h"
 
 #include "tsk_string.h"
 #include "tsk_memory.h"
 #include "tsk_debug.h"
 
 /***********************************
 *	Ragel state machine.
 */
 %%{
 	machine thttp_machine_parser_url;
 
 	# Includes
 	include thttp_machine_utils "./ragel/thttp_machine_utils.rl";
 			
 	action tag{
 		tag_start = p;
 	}
 
 	#/* Sets URL type */
 	action is_http { url->scheme = tsk_strdup("http"), url->type = thttp_url_http; }
 	action is_https { url->scheme = tsk_strdup("https"), url->type = thttp_url_https; }
 
 	#/* Sets HOST type */
 	action is_ipv4 { url->host_type = url->host_type = thttp_host_ipv4; }
 	action is_ipv6 { url->host_type = url->host_type = thttp_host_ipv6; }
 	action is_hostname { url->host_type = url->host_type = thttp_host_hostname; }
 
 	action parse_host{
 		TSK_PARSER_SET_STRING(url->host);
 	}
 
 	action parse_port{
 		have_port = 1;
 		TSK_PARSER_SET_INT(url->port);
 	}
 
 	action parse_hpath{
 		TSK_PARSER_SET_STRING(url->hpath);
 	}
 
 	action parse_search{
 		TSK_PARSER_SET_STRING(url->search);
 	}
 
 	action eob{
 	}
 
 	#// RFC 1738: "http://" hostport [ "/" hpath [ "?" search ]]
 	#// FIXME: hpath is no optional (see above) but in my def. I use it as opt (any*).
 
 	search = any* >tag %parse_search;
 	hpath = any* >tag %parse_hpath;
 	port = DIGIT+ >tag %parse_port;
 	myhost = ((IPv6reference >is_ipv6) | (IPv4address >is_ipv4) | (hostname >is_hostname)) >tag %parse_host;
 	hostport = myhost ( ":" port )?;
 	main := ( (("http:"i>tag %is_http | "https:"i>tag %is_https) "//")? hostport? :>("/" hpath :>("?" search)?)? ) @eob;
 	#main := ( hostport? :>("/" hpath :>("?" search)?)? ) @eob;
 	
 }%%
 
 /**@ingroup thttp_url_group
 * Parses a HTTP/HTTPS url.
 * @param urlstring A pointer to a valid url string. If the port is missing, then it's default value will be 443 if
 * the scheme is 'https' and 80 otherwise.<br>
 * @param length The length of the url string.
 * @retval A well-defined object representing the url string.
 *
 * @code
 * thttp_url_t* url = thttp_url_parse("http://www.google.com", tsk_strlen("http://www.google.com"));
 * @endcode
 *
 * @sa @ref thttp_url_tostring<br>@ref thttp_url_serialize
 **/
 thttp_url_t *thttp_url_parse(const char *urlstring, tsk_size_t length)
 {
 	tsk_bool_t have_port = tsk_false;
 	int cs = 0;
 	const char *p = urlstring;
 	const char *pe = p + length;
 	const char *eof = pe;
 
 	const char *ts = 0, *te = 0;
 	int act =0;
 
 	thttp_url_t *url = thttp_url_create(thttp_url_unknown);
 	
 	const char *tag_start = 0;
 	
 	TSK_RAGEL_DISABLE_WARNINGS_BEGIN()
 	%%write data;
 	(void)(ts);
 	(void)(te);
 	(void)(act);
 	(void)(eof);
 	(void)(thttp_machine_parser_url_first_final);
 	(void)(thttp_machine_parser_url_error);
 	(void)(thttp_machine_parser_url_en_main);
 	%%write init;
 	%%write exec;
 	TSK_RAGEL_DISABLE_WARNINGS_END()
 	
 	if( cs < %%{ write first_final; }%% ){
 		TSK_DEBUG_ERROR("Failed to parse HTTP/HTTPS URL: '%.*s'", length, urlstring);
 		TSK_OBJECT_SAFE_FREE(url);
 	}
 	else if(!have_port){
 		if(url->type == thttp_url_https){
 			url->port = 443;
 		}
 		else{
 			url->port = 80;
 		}
 	}
 	
 	return url;
 }