summaryrefslogtreecommitdiff
path: root/ext/standard/url.c
diff options
context:
space:
mode:
Diffstat (limited to 'ext/standard/url.c')
-rw-r--r--ext/standard/url.c722
1 files changed, 0 insertions, 722 deletions
diff --git a/ext/standard/url.c b/ext/standard/url.c
deleted file mode 100644
index d425e985c7..0000000000
--- a/ext/standard/url.c
+++ /dev/null
@@ -1,722 +0,0 @@
-/*
- +----------------------------------------------------------------------+
- | PHP Version 5 |
- +----------------------------------------------------------------------+
- | Copyright (c) 1997-2006 The PHP Group |
- +----------------------------------------------------------------------+
- | This source file is subject to version 3.01 of the PHP license, |
- | that is bundled with this package in the file LICENSE, and is |
- | available through the world-wide-web at the following url: |
- | http://www.php.net/license/3_01.txt |
- | If you did not receive a copy of the PHP license and are unable to |
- | obtain it through the world-wide-web, please send a note to |
- | license@php.net so we can mail you a copy immediately. |
- +----------------------------------------------------------------------+
- | Author: Jim Winstead <jimw@php.net> |
- +----------------------------------------------------------------------+
- */
-/* $Id$ */
-
-#include <stdlib.h>
-#include <string.h>
-#include <ctype.h>
-#include <sys/types.h>
-
-#include "php.h"
-
-#include "url.h"
-#include "file.h"
-#ifdef _OSD_POSIX
-#ifndef APACHE
-#error On this EBCDIC platform, PHP is only supported as an Apache module.
-#else /*APACHE*/
-#ifndef CHARSET_EBCDIC
-#define CHARSET_EBCDIC /* this machine uses EBCDIC, not ASCII! */
-#endif
-#include "ebcdic.h"
-#endif /*APACHE*/
-#endif /*_OSD_POSIX*/
-
-/* {{{ free_url
- */
-PHPAPI void php_url_free(php_url *theurl)
-{
- if (theurl->scheme)
- efree(theurl->scheme);
- if (theurl->user)
- efree(theurl->user);
- if (theurl->pass)
- efree(theurl->pass);
- if (theurl->host)
- efree(theurl->host);
- if (theurl->path)
- efree(theurl->path);
- if (theurl->query)
- efree(theurl->query);
- if (theurl->fragment)
- efree(theurl->fragment);
- efree(theurl);
-}
-/* }}} */
-
-/* {{{ php_replace_controlchars
- */
-PHPAPI char *php_replace_controlchars_ex(char *str, int len)
-{
- unsigned char *s = (unsigned char *)str;
- unsigned char *e = (unsigned char *)str + len;
-
- if (!str) {
- return (NULL);
- }
-
- while (s < e) {
-
- if (iscntrl(*s)) {
- *s='_';
- }
- s++;
- }
-
- return (str);
-}
-/* }}} */
-
-PHPAPI char *php_replace_controlchars(char *str)
-{
- return php_replace_controlchars_ex(str, strlen(str));
-}
-
-PHPAPI php_url *php_url_parse(char const *str)
-{
- return php_url_parse_ex(str, strlen(str));
-}
-
-/* {{{ php_url_parse
- */
-PHPAPI php_url *php_url_parse_ex(char const *str, int length)
-{
- char port_buf[6];
- php_url *ret = ecalloc(1, sizeof(php_url));
- char const *s, *e, *p, *pp, *ue;
-
- s = str;
- ue = s + length;
-
- /* parse scheme */
- if ((e = memchr(s, ':', length)) && (e - s)) {
- /* validate scheme */
- p = s;
- while (p < e) {
- /* scheme = 1*[ lowalpha | digit | "+" | "-" | "." ] */
- if (!isalpha(*p) && !isdigit(*p) && *p != '+' && *p != '.' && *p != '-') {
- if (e + 1 < ue) {
- goto parse_port;
- } else {
- goto just_path;
- }
- }
- p++;
- }
-
- if (*(e + 1) == '\0') { /* only scheme is available */
- ret->scheme = estrndup(s, (e - s));
- php_replace_controlchars_ex(ret->scheme, (e - s));
- goto end;
- }
-
- /*
- * certain schemas like mailto: and zlib: may not have any / after them
- * this check ensures we support those.
- */
- if (*(e+1) != '/') {
- /* check if the data we get is a port this allows us to
- * correctly parse things like a.com:80
- */
- p = e + 1;
- while (isdigit(*p)) {
- p++;
- }
-
- if ((*p == '\0' || *p == '/') && (p - e) < 7) {
- goto parse_port;
- }
-
- ret->scheme = estrndup(s, (e-s));
- php_replace_controlchars_ex(ret->scheme, (e - s));
-
- length -= ++e - s;
- s = e;
- goto just_path;
- } else {
- ret->scheme = estrndup(s, (e-s));
- php_replace_controlchars_ex(ret->scheme, (e - s));
-
- if (*(e+2) == '/') {
- s = e + 3;
- if (!strncasecmp("file", ret->scheme, sizeof("file"))) {
- if (*(e + 3) == '/') {
- /* support windows drive letters as in:
- file:///c:/somedir/file.txt
- */
- if (*(e + 5) == ':') {
- s = e + 4;
- }
- goto nohost;
- }
- }
- } else {
- if (!strncasecmp("file", ret->scheme, sizeof("file"))) {
- s = e + 1;
- goto nohost;
- } else {
- length -= ++e - s;
- s = e;
- goto just_path;
- }
- }
- }
- } else if (e) { /* no scheme, look for port */
- parse_port:
- p = e + 1;
- pp = p;
-
- while (pp-p < 6 && isdigit(*pp)) {
- pp++;
- }
-
- if (pp-p < 6 && (*pp == '/' || *pp == '\0')) {
- memcpy(port_buf, p, (pp-p));
- port_buf[pp-p] = '\0';
- ret->port = atoi(port_buf);
- } else {
- goto just_path;
- }
- } else {
- just_path:
- ue = s + length;
- goto nohost;
- }
-
- e = ue;
-
- if (!(p = memchr(s, '/', (ue - s)))) {
- if ((p = memchr(s, '?', (ue - s)))) {
- e = p;
- } else if ((p = memchr(s, '#', (ue - s)))) {
- e = p;
- }
- } else {
- e = p;
- }
-
- /* check for login and password */
- if ((p = memchr(s, '@', (e-s)))) {
- if ((pp = memchr(s, ':', (p-s)))) {
- if ((pp-s) > 0) {
- ret->user = estrndup(s, (pp-s));
- php_replace_controlchars_ex(ret->user, (pp - s));
- }
-
- pp++;
- if (p-pp > 0) {
- ret->pass = estrndup(pp, (p-pp));
- php_replace_controlchars_ex(ret->pass, (p-pp));
- }
- } else {
- ret->user = estrndup(s, (p-s));
- php_replace_controlchars_ex(ret->user, (p-s));
- }
-
- s = p + 1;
- }
-
- /* check for port */
- if (*s == '[' && *(e-1) == ']') {
- /* Short circuit portscan,
- we're dealing with an
- IPv6 embedded address */
- p = s;
- } else {
- /* memrchr is a GNU specific extension
- Emulate for wide compatability */
- for(p = e; *p != ':' && p >= s; p--);
- }
-
- if (p >= s && *p == ':') {
- if (!ret->port) {
- p++;
- if (e-p > 5) { /* port cannot be longer then 5 characters */
- STR_FREE(ret->scheme);
- STR_FREE(ret->user);
- STR_FREE(ret->pass);
- efree(ret);
- return NULL;
- } else if (e - p > 0) {
- memcpy(port_buf, p, (e-p));
- port_buf[e-p] = '\0';
- ret->port = atoi(port_buf);
- }
- p--;
- }
- } else {
- p = e;
- }
-
- /* check if we have a valid host, if we don't reject the string as url */
- if ((p-s) < 1) {
- STR_FREE(ret->scheme);
- STR_FREE(ret->user);
- STR_FREE(ret->pass);
- efree(ret);
- return NULL;
- }
-
- ret->host = estrndup(s, (p-s));
- php_replace_controlchars_ex(ret->host, (p - s));
-
- if (e == ue) {
- return ret;
- }
-
- s = e;
-
- nohost:
-
- if ((p = memchr(s, '?', (ue - s)))) {
- pp = strchr(s, '#');
-
- if (pp && pp < p) {
- p = pp;
- pp = strchr(pp+2, '#');
- }
-
- if (p - s) {
- ret->path = estrndup(s, (p-s));
- php_replace_controlchars_ex(ret->path, (p - s));
- }
-
- if (pp) {
- if (pp - ++p) {
- ret->query = estrndup(p, (pp-p));
- php_replace_controlchars_ex(ret->query, (pp - p));
- }
- p = pp;
- goto label_parse;
- } else if (++p - ue) {
- ret->query = estrndup(p, (ue-p));
- php_replace_controlchars_ex(ret->query, (ue - p));
- }
- } else if ((p = memchr(s, '#', (ue - s)))) {
- if (p - s) {
- ret->path = estrndup(s, (p-s));
- php_replace_controlchars_ex(ret->path, (p - s));
- }
-
- label_parse:
- p++;
-
- if (ue - p) {
- ret->fragment = estrndup(p, (ue-p));
- php_replace_controlchars_ex(ret->fragment, (ue - p));
- }
- } else {
- ret->path = estrndup(s, (ue-s));
- php_replace_controlchars_ex(ret->path, (ue - s));
- }
-end:
- return ret;
-}
-/* }}} */
-
-/* {{{ proto mixed parse_url(string url, [int url_component])
- Parse a URL and return its components */
-PHP_FUNCTION(parse_url)
-{
- char *str;
- int str_len;
- php_url *resource;
- long key = -1;
-
- if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|l", &str, &str_len, &key) == FAILURE) {
- return;
- }
-
- resource = php_url_parse_ex(str, str_len);
- if (resource == NULL) {
- php_error_docref1(NULL TSRMLS_CC, str, E_WARNING, "Unable to parse url");
- RETURN_FALSE;
- }
-
- if (key > -1) {
- switch (key) {
- case PHP_URL_SCHEME:
- if (resource->scheme != NULL) RETVAL_STRING(resource->scheme, 1);
- break;
- case PHP_URL_HOST:
- if (resource->host != NULL) RETVAL_STRING(resource->host, 1);
- break;
- case PHP_URL_PORT:
- if (resource->port != 0) RETVAL_LONG(resource->port);
- break;
- case PHP_URL_USER:
- if (resource->user != NULL) RETVAL_STRING(resource->user, 1);
- break;
- case PHP_URL_PASS:
- if (resource->pass != NULL) RETVAL_STRING(resource->pass, 1);
- break;
- case PHP_URL_PATH:
- if (resource->path != NULL) RETVAL_STRING(resource->path, 1);
- break;
- case PHP_URL_QUERY:
- if (resource->query != NULL) RETVAL_STRING(resource->query, 1);
- break;
- case PHP_URL_FRAGMENT:
- if (resource->fragment != NULL) RETVAL_STRING(resource->fragment, 1);
- break;
- default:
- php_error_docref(NULL TSRMLS_CC, E_WARNING, "Invalid url component identifier %ld.", key);
- RETVAL_FALSE;
- }
- goto done;
- }
-
- /* allocate an array for return */
- array_init(return_value);
-
- /* add the various elements to the array */
- if (resource->scheme != NULL)
- add_assoc_string(return_value, "scheme", resource->scheme, 1);
- if (resource->host != NULL)
- add_assoc_string(return_value, "host", resource->host, 1);
- if (resource->port != 0)
- add_assoc_long(return_value, "port", resource->port);
- if (resource->user != NULL)
- add_assoc_string(return_value, "user", resource->user, 1);
- if (resource->pass != NULL)
- add_assoc_string(return_value, "pass", resource->pass, 1);
- if (resource->path != NULL)
- add_assoc_string(return_value, "path", resource->path, 1);
- if (resource->query != NULL)
- add_assoc_string(return_value, "query", resource->query, 1);
- if (resource->fragment != NULL)
- add_assoc_string(return_value, "fragment", resource->fragment, 1);
-done:
- php_url_free(resource);
-}
-/* }}} */
-
-/* {{{ php_htoi
- */
-static int php_htoi(char *s)
-{
- int value;
- int c;
-
- c = ((unsigned char *)s)[0];
- if (isupper(c))
- c = tolower(c);
- value = (c >= '0' && c <= '9' ? c - '0' : c - 'a' + 10) * 16;
-
- c = ((unsigned char *)s)[1];
- if (isupper(c))
- c = tolower(c);
- value += c >= '0' && c <= '9' ? c - '0' : c - 'a' + 10;
-
- return (value);
-}
-/* }}} */
-
-/* rfc1738:
-
- ...The characters ";",
- "/", "?", ":", "@", "=" and "&" are the characters which may be
- reserved for special meaning within a scheme...
-
- ...Thus, only alphanumerics, the special characters "$-_.+!*'(),", and
- reserved characters used for their reserved purposes may be used
- unencoded within a URL...
-
- For added safety, we only leave -_. unencoded.
- */
-
-static unsigned char hexchars[] = "0123456789ABCDEF";
-
-/* {{{ php_url_encode
- */
-PHPAPI char *php_url_encode(char const *s, int len, int *new_length)
-{
- register unsigned char c;
- unsigned char *to, *start;
- unsigned char const *from, *end;
-
- from = s;
- end = s + len;
- start = to = (unsigned char *) safe_emalloc(3, len, 1);
-
- while (from < end) {
- c = *from++;
-
- if (c == ' ') {
- *to++ = '+';
-#ifndef CHARSET_EBCDIC
- } else if ((c < '0' && c != '-' && c != '.') ||
- (c < 'A' && c > '9') ||
- (c > 'Z' && c < 'a' && c != '_') ||
- (c > 'z')) {
- to[0] = '%';
- to[1] = hexchars[c >> 4];
- to[2] = hexchars[c & 15];
- to += 3;
-#else /*CHARSET_EBCDIC*/
- } else if (!isalnum(c) && strchr("_-.", c) == NULL) {
- /* Allow only alphanumeric chars and '_', '-', '.'; escape the rest */
- to[0] = '%';
- to[1] = hexchars[os_toascii[c] >> 4];
- to[2] = hexchars[os_toascii[c] & 15];
- to += 3;
-#endif /*CHARSET_EBCDIC*/
- } else {
- *to++ = c;
- }
- }
- *to = 0;
- if (new_length) {
- *new_length = to - start;
- }
- return (char *) start;
-}
-/* }}} */
-
-/* {{{ proto string urlencode(string str)
- URL-encodes string */
-PHP_FUNCTION(urlencode)
-{
- char *in_str, *out_str;
- int in_str_len, out_str_len;
-
- if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &in_str,
- &in_str_len) == FAILURE) {
- return;
- }
-
- out_str = php_url_encode(in_str, in_str_len, &out_str_len);
- RETURN_STRINGL(out_str, out_str_len, 0);
-}
-/* }}} */
-
-/* {{{ proto string urldecode(string str)
- Decodes URL-encoded string */
-PHP_FUNCTION(urldecode)
-{
- char *in_str, *out_str;
- int in_str_len, out_str_len;
-
- if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &in_str,
- &in_str_len) == FAILURE) {
- return;
- }
-
- out_str = estrndup(in_str, in_str_len);
- out_str_len = php_url_decode(out_str, in_str_len);
-
- RETURN_STRINGL(out_str, out_str_len, 0);
-}
-/* }}} */
-
-/* {{{ php_url_decode
- */
-PHPAPI int php_url_decode(char *str, int len)
-{
- char *dest = str;
- char *data = str;
-
- while (len--) {
- if (*data == '+') {
- *dest = ' ';
- }
- else if (*data == '%' && len >= 2 && isxdigit((int) *(data + 1))
- && isxdigit((int) *(data + 2))) {
-#ifndef CHARSET_EBCDIC
- *dest = (char) php_htoi(data + 1);
-#else
- *dest = os_toebcdic[(char) php_htoi(data + 1)];
-#endif
- data += 2;
- len -= 2;
- } else {
- *dest = *data;
- }
- data++;
- dest++;
- }
- *dest = '\0';
- return dest - str;
-}
-/* }}} */
-
-/* {{{ php_raw_url_encode
- */
-PHPAPI char *php_raw_url_encode(char const *s, int len, int *new_length)
-{
- register int x, y;
- unsigned char *str;
-
- str = (unsigned char *) safe_emalloc(3, len, 1);
- for (x = 0, y = 0; len--; x++, y++) {
- str[y] = (unsigned char) s[x];
-#ifndef CHARSET_EBCDIC
- if ((str[y] < '0' && str[y] != '-' && str[y] != '.') ||
- (str[y] < 'A' && str[y] > '9') ||
- (str[y] > 'Z' && str[y] < 'a' && str[y] != '_') ||
- (str[y] > 'z')) {
- str[y++] = '%';
- str[y++] = hexchars[(unsigned char) s[x] >> 4];
- str[y] = hexchars[(unsigned char) s[x] & 15];
-#else /*CHARSET_EBCDIC*/
- if (!isalnum(str[y]) && strchr("_-.", str[y]) != NULL) {
- str[y++] = '%';
- str[y++] = hexchars[os_toascii[(unsigned char) s[x]] >> 4];
- str[y] = hexchars[os_toascii[(unsigned char) s[x]] & 15];
-#endif /*CHARSET_EBCDIC*/
- }
- }
- str[y] = '\0';
- if (new_length) {
- *new_length = y;
- }
- return ((char *) str);
-}
-/* }}} */
-
-/* {{{ proto string rawurlencode(string str)
- URL-encodes string */
-PHP_FUNCTION(rawurlencode)
-{
- char *in_str, *out_str;
- int in_str_len, out_str_len;
-
- if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &in_str,
- &in_str_len) == FAILURE) {
- return;
- }
-
- out_str = php_raw_url_encode(in_str, in_str_len, &out_str_len);
- RETURN_STRINGL(out_str, out_str_len, 0);
-}
-/* }}} */
-
-/* {{{ proto string rawurldecode(string str)
- Decodes URL-encodes string */
-PHP_FUNCTION(rawurldecode)
-{
- char *in_str, *out_str;
- int in_str_len, out_str_len;
-
- if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &in_str,
- &in_str_len) == FAILURE) {
- return;
- }
-
- out_str = estrndup(in_str, in_str_len);
- out_str_len = php_raw_url_decode(out_str, in_str_len);
-
- RETURN_STRINGL(out_str, out_str_len, 0);
-}
-/* }}} */
-
-/* {{{ php_raw_url_decode
- */
-PHPAPI int php_raw_url_decode(char *str, int len)
-{
- char *dest = str;
- char *data = str;
-
- while (len--) {
- if (*data == '%' && len >= 2 && isxdigit((int) *(data + 1))
- && isxdigit((int) *(data + 2))) {
-#ifndef CHARSET_EBCDIC
- *dest = (char) php_htoi(data + 1);
-#else
- *dest = os_toebcdic[(char) php_htoi(data + 1)];
-#endif
- data += 2;
- len -= 2;
- } else {
- *dest = *data;
- }
- data++;
- dest++;
- }
- *dest = '\0';
- return dest - str;
-}
-/* }}} */
-
-/* {{{ proto array get_headers(string url)
- fetches all the headers sent by the server in response to a HTTP request */
-PHP_FUNCTION(get_headers)
-{
- char *url;
- int url_len;
- php_stream_context *context;
- php_stream *stream;
- zval **prev_val, **hdr = NULL;
- HashPosition pos;
- long format = 0;
-
- if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|l", &url, &url_len, &format) == FAILURE) {
- return;
- }
- context = FG(default_context) ? FG(default_context) : (FG(default_context) = php_stream_context_alloc());
-
- if (!(stream = php_stream_open_wrapper_ex(url, "r", REPORT_ERRORS | STREAM_USE_URL | STREAM_ONLY_GET_HEADERS, NULL, context))) {
- RETURN_FALSE;
- }
-
- array_init(return_value);
-
- zend_hash_internal_pointer_reset_ex(HASH_OF(stream->wrapperdata), &pos);
- while (zend_hash_get_current_data_ex(HASH_OF(stream->wrapperdata), (void**)&hdr, &pos) != FAILURE) {
- if (!format) {
-no_name_header:
- add_next_index_stringl(return_value, Z_STRVAL_PP(hdr), Z_STRLEN_PP(hdr), 1);
- } else {
- char c;
- char *s, *p;
-
- if ((p = strchr(Z_STRVAL_PP(hdr), ':'))) {
- c = *p;
- *p = '\0';
- s = p + 1;
- while (isspace((int)*(unsigned char *)s)) {
- s++;
- }
-
- if (zend_hash_find(HASH_OF(return_value), Z_STRVAL_PP(hdr), (p - Z_STRVAL_PP(hdr) + 1), (void **) &prev_val) == FAILURE) {
- add_assoc_stringl_ex(return_value, Z_STRVAL_PP(hdr), (p - Z_STRVAL_PP(hdr) + 1), s, (Z_STRLEN_PP(hdr) - (s - Z_STRVAL_PP(hdr))), 1);
- } else { /* some headers may occur more then once, therefor we need to remake the string into an array */
- convert_to_array(*prev_val);
- add_next_index_stringl(*prev_val, s, (Z_STRLEN_PP(hdr) - (s - Z_STRVAL_PP(hdr))), 1);
- }
-
- *p = c;
- } else {
- goto no_name_header;
- }
- }
- zend_hash_move_forward_ex(HASH_OF(stream->wrapperdata), &pos);
- }
-
- php_stream_close(stream);
-}
-/* }}} */
-
-/*
- * Local variables:
- * tab-width: 4
- * c-basic-offset: 4
- * End:
- * vim600: sw=4 ts=4 fdm=marker
- * vim<600: sw=4 ts=4
- */