From 9dbbc4adb258de47306f79f092c3a7162ad76bde Mon Sep 17 00:00:00 2001 From: Dominique Fournier Date: Tue, 18 Dec 2018 19:24:33 +0000 Subject: [PATCH] HTTPClient : first version git-svn-id: https://svn.fournier38.fr/svn/ProgSVN/trunk@4781 bf3deb0d-5f1a-0410-827f-c0cc1f45334c --- httpclient.php | 751 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 751 insertions(+) create mode 100644 httpclient.php diff --git a/httpclient.php b/httpclient.php new file mode 100644 index 0000000..807a72d --- /dev/null +++ b/httpclient.php @@ -0,0 +1,751 @@ + + */ + +/** This programe allow to get a HTTP page from a site, and examine the content. + * It will store the Cookies, allow to do the redirects, follow links and + * get form / input and send the values. + */ +class Httpclient +{ + ////////////////////////// + //// PROPERTIES //// + ////////////////////////// + // {{{ + /** The chunk size staying to read + */ + private $debug = null; + + /** The URL to use + */ + private $url = ""; + + /** The cookies + */ + private $cookies = array (); + + /** Store the session cookies when analyzing the answer of the server + */ + private $cookiesSession = true; + + /** The headersReceived + */ + private $headersReceived = array (); + + /** The Method used to communicate : GET, POST, HEAD, PUT, DELETE + */ + private $method = "GET"; + + /** The TCP port used for connection + */ + private $port = null; + + /** The TCPClient object + */ + private $tcpclient = null; + + /** The maximum maxsize allowed + */ + private $maxsize; + + /** The returned HTTP code from the server + */ + private $httpCode = null; + + /** The body size staying to read + */ + private $bodySize = null; + + /** The content method to get the content : chunked or Content-Length + */ + private $contentMethod = false; + + /** Follow X redirects before abort + */ + private $redirectMaxCount = 10; + + /** The actual number of redirect + */ + private $redirectCount = 0; + + /** The form data to send + * Will be of type array ("field" => "value") + * If value is like "@/tmp/file", use the /tmp/file as content + */ + private $formData = array (); + // }}} + + /** The timeout in second before expiring the connection + */ + private $timeout = 30; + + /** The constructor + */ + public function __construct () + // {{{ + { + $maxsize = str_replace (array ('G', 'M', 'K'), + array ('000000000', '000000', '000'), + ini_get ('memory_limit')); + $maxsize = intval ($maxsize / 2); + // If no maxsize limit, set to 8G + if ($maxsize === 0) + $maxsize = 8000000000; + $this->maxsize = $maxsize; + } + // }}} + + ///////////////////////////////// + //// GETTERS / SETTERS //// + ///////////////////////////////// + /** Set / Get the url + * @param string|null $url Set / Get the url + */ + public function url ($url = null) + // {{{ + { + if ($url === null) + return $this->url; + if (! is_string ($url)) + throw new \Exception ("Invalid url to set : not a string", 406); + $this->url = $url; + return $this; + } + // }}} + + /** Set / Get the cookies stored + * @param array|null $cookies Set / Get the cookies + */ + public function cookies ($cookies = null) + // {{{ + { + if ($cookies === null) + return $this->cookies; + if (! is_array ($cookies)) + throw new \Exception ("Invalid cookies to set : not an array", 406); + $this->cookies = $cookies; + return $this; + } + // }}} + + /** Set / Get the method + * @param string|null $method Set / Get the method + */ + public function method ($method = null) + // {{{ + { + if ($method === null) + return $this->method; + if (! is_string ($method)) + throw new \Exception ("Invalid method to set : not an string", 406); + if (! in_array ($method, array ("GET", "POST", "PUT", "DELETE", "HEAD"))) + throw new \Exception ("Invalid method to set : not in list", 406); + $this->method = $method; + return $this; + } + // }}} + + /** Get the headersReceived after the page was get + */ + public function headersReceived () + // {{{ + { + return $this->headersReceived; + } + // }}} + + /** Get the port used for connection + */ + public function port () + // {{{ + { + return $this->port; + } + // }}} + + /** Set / Get the maximum maxsize allowed + * @param integer|null $maxsize The maxsize in bytes + */ + public function maxsize ($maxsize = null) + // {{{ + { + if ($maxsize === null) + return $this->maxsize; + $this->maxsize = intval ($maxsize); + return $this; + } + // }}} + + /** Get the HTTP Return code from connection + */ + public function httpCode () + // {{{ + { + return $this->httpCode; + } + // }}} + + /** Set / Get the debug mode + * @param boolean|null $debug The debug value to set or get + */ + public function debug ($debug = null) + // {{{ + { + if ($debug === null) + return $this->debug; + $this->debug = !! ($debug); + return $this; + } + // }}} + + /** Set / Get the form Data + * Will be of type array ("field" => "value") + * If value is like "@/tmp/file", use the /tmp/file as content + * @param array|null $formData The data to send to the server + */ + public function formData ($formData) + // {{{ + { + if ($formData === null) + return $this->formData; + if (! is_array ($formData)) + throw new \Exception ("Invalid form data provided : not an array", 406); + $this->formData = $formData; + return $this; + } + // }}} + + /** Get / Set the Store of session cookies when analyzing the answer of the + * server + * @param boolean|null $cookiesSession Allow to store the session cookies + */ + public function cookiesSession ($cookiesSession = null) + // {{{ + { + if ($cookiesSession === null) + return $this->cookiesSession; + $this->cookiesSession = !! $cookiesSession; + return $this; + } + // }}} + + /** Get / Set the maximum number of redirect to follow before aborting + * @param integer|null $redirectMaxCount The maximum number of redirect + * before exception + */ + public function redirectMaxCount ($redirectMaxCount = null) + // {{{ + { + if ($redirectMaxCount === null) + return $this->redirectMaxCount; + $this->redirectMaxCount = intval ($redirectMaxCount); + return $this; + } + // }}} + + /** Get / Set the actual number of redirect + * @param integer|null $redirectCount The actual number of redirect + */ + public function redirectCount ($redirectCount = null) + // {{{ + { + if ($redirectCount === null) + return $this->redirectCount; + $this->redirectCount = intval ($redirectCount); + return $this; + } + // }}} + + /** Get / Set the timeout in second before expiring the connection + * 30s by default + * @param integer|null $timeout The timeout value + */ + public function timeout ($timeout) + // {{{ + { + if ($timeout === null) + return $this->timeout; + $this->timeout = intval ($timeout); + return $this; + } + // }}} + + ////////////////////////////////// + //// THE ACTIVE METHODS //// + ////////////////////////////////// + /** Get the page + * Will fill the headersReceived, cookies and port properties. + * This will fill all the RAM if the page is too big. For big files, use + * $httpclient->url ($url) + * ->connect () ; + * while ($content = $httpclient->read ()) {} + * $httpclient->disconnect (); + * If no maxsize limit is set, limit the download to 8G + * @param string $url The URL to get + * @param array|null $ssloptions The SSL options (stream_context_set_option) + * @return the page body + */ + public function getPage ($url, $ssloptions = array ()) + // {{{ + { + $this->method ("GET"); + $this->url ($url); + $this->connect ($ssloptions); + $content = ""; + while ($tmp = $this->read (1000000)) + { + $content .= $tmp; + if (strlen ($content) > $this->maxsize) + throw new \Exception ("File to get exceeded maxsize", 500); + } + $this->disconnect (); + if ($this->httpCode === 301 || $this->httpCode === 302) + { + if (! key_exists ("Location", $this->headersReceived)) + throw new \Exception ("Redirect without location provided", 406); + $this->redirectCount++; + if ($this->redirectCount > $this->redirectMaxCount) + throw new \Exception ("Redirect exceed maximum limit", 406); + // echo "REDIRECT TO ".$this->headersReceived["Location"]."\n"; + $content = $this->getPage ($this->headersReceived["Location"], + $ssloptions); + } + $this->redirectCount = 0; + return $content; + } + // }}} + + /** Init the connection to URL + * Will fill the headersReceived, cookies and port properties. + * @param array|null $ssloptions The SSL options (stream_context_set_option) + * @return null + */ + public function connect ($ssloptions = array ()) + // {{{ + { + $this->headersReceived = array (); + $this->bodySize = null; + $this->httpCode = null; + if ($this->url === "") + throw new \Exception ("No URL set to connect", 406); + // Manage the URL (and the parameters in GET method) + // {{{ + $parseURL = parse_url ($this->url); + if (! key_exists ("scheme", $parseURL)) + throw new \Exception ("Scheme must be set to http or https", 406); + if ($parseURL["scheme"] !== "http" && + $parseURL["scheme"] !== "https") + throw new \Exception ("Scheme must be http or https only", 406); + if (key_exists ("port", $parseURL)) + $this->port = $parseURL["port"]; + elseif (key_exists ("scheme", $parseURL)) + { + if ($parseURL["scheme"] === "http") + $this->port = 80; + elseif ($parseURL["scheme"] === "https") + $this->port = 443; + } + if (! key_exists ("path", $parseURL)) + $path = "/"; + else + $path = $parseURL["path"]; + if (key_exists ("query", $parseURL)) + $path .= "?".$parseURL["query"]; + + if ($this->method === "GET" && ! empty ($this->formData)) + { + // In GET method, the form data are added to the path + if (! key_exists ("query", $parseURL)) + $path .= "?"; + else + $path .= "&"; + $i = 0; + foreach ($this->formData as $key => $val) + { + if ($i > 0) + $path .= "&"; + $path .= rawurlencode ($key)."="; + if (isset ($val{0}) && $val{0} === "@") + { + $file = substr ($val, 1); + if (! file_exists ($file)) + throw new \Exception ("Data file '$file' doesn't exists", 406); + $val = file_get_contents ($file); + } + $path .= rawurlencode ($val); + $i ++; + } + } + + if (key_exists ("fragment", $parseURL)) + $path .= "#".$parseURL["fragment"]; + if (! key_exists ("host", $parseURL)) + throw new \Exception ("No host provided to URL", 406); + // }}} + + // Prepare the headers to be sent + // {{{ + $this->headersSent = array (); + $this->headersSent[] = "$this->method $path HTTP/1.1"; + $this->headersSent[] = "Host: ".$parseURL["host"]; + $this->headersSent[] = "Accept: text/html"; + $this->headersSent[] = "Connection: keep-alive"; + $cookies = $this->cookieToSend ($this->url); + if (! empty ($cookies)) + { + $this->headersSent[] = "Cookie: ".implode (";", $cookies); + } + if ($this->method !== "GET" && ! empty ($this->formData)) + { + $this->headersSent[] = "Content-Type: application/x-www-form-urlencoded"; + $len = 0; + foreach ($this->formData as $key => $val) + { + if ($len > 0) + $len++; // Add the & + $len += strlen (rawurlencode ($key)) + 1; + if (isset ($val{0}) && $val{0} === "@") + { + $file = substr ($val, 1); + if (! file_exists ($file)) + throw new \Exception ("Data file '$file' doesn't exists", 406); + // TODO : Do a loop of 1MB for big files instead of loading the mem + $len += strlen (rawurlencode (file_get_contents ($file))); + } + else + $len += strlen (rawurlencode ($val)); + } + $this->headersSent[] = "Content-Length: $len"; + } + // }}} + + // Send the request to the server + // {{{ + if ($this->tcpclient === null) + { + $this->tcpclient = new \vendor\domframework\tcpclient ($parseURL["host"], + $this->port); + $this->tcpclient->timeout ($this->timeout); + $this->tcpclient->connect (); + if ($parseURL["scheme"] === "https") + $this->tcpclient->cryptoEnable (true, null, $ssloptions); + } + $this->tcpclient->readMode ("text"); + foreach ($this->headersSent as $header) + { + $this->tcpclient->send ("$header\r\n"); + } + $this->tcpclient->send ("\r\n"); + // }}} + + // Send the POST data form if exists + // {{{ + if ($this->method !== "GET" && ! empty ($this->formData)) + { + $i = 0; + foreach ($this->formData as $key => $val) + { + if ($i > 0) + $this->tcpclient->send ("&"); + $this->tcpclient->send (rawurlencode ($key)."="); + if (isset ($val{0}) && $val{0} === "@") + { + $file = substr ($val, 1); + if (! file_exists ($file)) + throw new \Exception ("Data file '$file' doesn't exists", 406); + // TODO : Do a loop of 1MB for big files instead of loading the mem + $val = file_get_contents ($file); + $this->tcpclient->send (rawurlencode ($val)); + } + else + $this->tcpclient->send (rawurlencode ($val)); + $i ++; + } + } + // }}} + + // Get the result header from the server + // {{{ + $headers = array (); + while (($header = $this->tcpclient->read ()) !== "") + { + @list ($key, $val) = explode (":", $header, 2); + if ($val === null) + $headers[] = $header; + else + { + if (key_exists ($key, $headers)) + { + if (! (is_array ($headers[$key]))) + $headers[$key] = array ($headers[$key]); + $headers[$key][] = trim ($val); + } + else + $headers[$key] = trim ($val); + } + } + if (! key_exists (0, $headers)) + throw new \Exception ("No HTTP code available from server", 500); + $this->headersReceived = $headers; + $returnCode = $headers[0]; + preg_match_all ("#^HTTP/(?P\d.\d) (?P\d+) ". + "(?P.+)$#i", $returnCode, $matches); + if (isset ($matches["HTTPCode"][0])) + $this->httpCode = intval ($matches["HTTPCode"][0]); + + // Add the received cookies to property + if (isset ($headers["Set-Cookie"])) + { + if (! is_array ($headers["Set-Cookie"])) + $cookies = array ($headers["Set-Cookie"]); + else + $cookies = $headers["Set-Cookie"]; + foreach ($cookies as $cookie) + { + // The invalid cookies are silently dropped + $this->cookieAdd ($parseURL["host"], $cookie); + } + } + + $this->contentMethod = false; + if (key_exists ("Transfer-Encoding", $headers) && + $headers["Transfer-Encoding"] === "chunked") + { + $this->contentMethod = "chunked"; + $this->bodySize = 0; + } + elseif (key_exists ("Content-Length", $headers) && + $headers["Content-Length"] > 0) + { + $this->contentMethod = "Content-Length"; + $this->bodySize = $headers["Content-Length"]; + } + elseif ($this->httpCode !== 204 && $this->httpCode !== 301 && + $this->httpCode !== 302) + throw new \Exception ("No transfert content provided", 500); + // }}} + } + // }}} + + /** Read max MAXSIZE bytes + * Return false if all the file is received + * @param integer $maxsize The maxsize to get in this read + */ + public function read ($maxsize = 4096) + // {{{ + { + if ($this->tcpclient === null) + throw new \Exception ("HTTPClient : can not read non connected URL", 406); + $this->tcpclient->timeout ($this->timeout); + if ($this->contentMethod === false) + { + // If the server will never send anything, code 204 by example, do not try + // to get data + return ""; + } + $content = ""; + if ($this->contentMethod === "chunked" && $this->bodySize === 0) + { + // Get the body chunk size + $this->tcpclient->readMode ("text"); + $size = trim ($this->tcpclient->read ()); + $this->bodySize = hexdec ($size); + } + if ($this->bodySize === 0) + return ""; + + $toBeRead = $this->bodySize; + if ($toBeRead > $maxsize) + $toBeRead = $maxsize; + $this->tcpclient->readMode ("binary"); + $content = $this->tcpclient->read ($toBeRead); + $this->bodySize = $this->bodySize - strlen ($content); + if ($this->contentMethod === "chunked" && $this->bodySize === 0) + { + // Get the Carriage return before the next chunk size + $this->tcpclient->readMode ("text"); + $cr = trim ($this->tcpclient->read ()); + } + return $content; + } + // }}} + + /** Disconnect the connection + */ + public function disconnect () + // {{{ + { + $this->tcpclient = null; + } + // }}} + + ////////////////////////////////// + //// COOKIES MANAGEMENT //// + ////////////////////////////////// + /** The cookies are stored in Netscape cookies.txt file : + The layout of Netscape's cookies.txt file is such that each line contains + one name-value pair. An example cookies.txt file may have an entry that + looks like this: + + .netscape.com TRUE / FALSE 946684799 NETSCAPE_ID 100103 + + Each line represents a single piece of stored information. A tab is inserted + between each of the fields. + + From left-to-right, here is what each field represents: + domain : The domain that created AND that can read the variable. + flag : A TRUE/FALSE value indicating if all machines within a given + domain can access the variable. This value is set automatically + by the browser, depending on the value you set for domain. + path : The path within the domain that the variable is valid for. + secure : A TRUE/FALSE value indicating if a secure connection with the + domain is needed to access the variable. + expiration : The UNIX time that the variable will expire on. UNIX time is + defined as the number of seconds since Jan 1, 1970 00:00:00 + GMT. + name : The name of the variable. + + value : The value of the variable. + */ + + /** Add a cookie in the store + * If the cookie already exists, the old one is replaced by the new value + * @param string $domain The domain to use + * @param string $cookie The cookie content to store + */ + public function cookieAdd ($domain, $cookie) + // {{{ + { + // echo "COOKIE = $cookie\n"; + $content = explode (";", $cookie); + $flag = "FALSE"; + $path = "/"; + $secure = "FALSE"; + $expiration = 0; + $name = ""; + $value = ""; + foreach ($content as $part) + { + @list ($key, $val) = explode ("=", $part, 2); + $key = trim ($key); + if ($key === "path") $path = $val; + elseif ($key === "domain") + { + // Check if $domain is compatible with $key before storing the cookie + if (substr ($domain, -1 * strlen ($val)) === $val) + { + $domain = $val; + $flag = "TRUE"; + } + else + { + return "Invalid domain provided"; + } + } + elseif ($key === "expires") + { + try + { + $date = new \DateTime ($val); + $expiration = $date->getTimestamp(); + } + catch (\Exception $e) + { + return "Invalid expires date provided"; + } + } + elseif ($val !== null && $name === "") + { + // Only the first value will be stored as cookie (name,val) pair + // echo "KEY=$key => $val\n"; + $name = $key; + $value = $val; + } + elseif ($val !== null) + { + // echo "Not managed key=>val $key=>$val\n"; + } + else + { + // No value provided : no test + } + } + + $cookieLine = "$domain\t$flag\t$path\t$secure\t$expiration\t$name\t$value"; + if (strlen ($cookieLine) > 4096) + return "Cookie value too long"; + if ($expiration === 0 && $this->cookiesSession === false) + { + // echo "Do not store Session cookies\n"; + return; + } + + $found = false; + foreach ($this->cookies as $key => $storedCookie) + { + $storedCookie = explode ("\t", $storedCookie); + if (! key_exists (0, $storedCookie) || + ! key_exists (5, $storedCookie)) + continue; + if ($storedCookie[0] !== $domain || $storedCookie[5] !== $name) + continue; + if ($expiration < time ()) + { + //echo "Remove the already set cookie for $domain $name : expired\n"; + unset ($this->cookies[$key]); + $found = true; + } + else + { + //echo "Update the already set cookie for $domain $name\n"; + $this->cookies[$key] = $cookieLine; + $found = true; + } + } + if ($found === false) + { + //echo "Append the new cookie for $domain $name\n"; + $this->cookies[] = $cookieLine; + } + } + // }}} + + /** Check if some stored cookies must be send to the server, because they are + * in the same domain. + * @param string $url The URL requested + * @return array the cookies to add to the headers send to the server + */ + public function cookieToSend ($url) + // {{{ + { + $parseURL = parse_url ($this->url); + if ($parseURL === false) + return array (); + if (! key_exists ("host", $parseURL)) + return array (); + if (! key_exists ("path", $parseURL)) + $parseURL["path"] = "/"; + if ($parseURL["path"]{0} !== "/") + $parseURL["path"] = "/".$parseURL["path"]; + $res = array (); + foreach ($this->cookies as $storedCookie) + { + $storedCookie = explode ("\t", $storedCookie); + if (! key_exists (0, $storedCookie) || + ! key_exists (2, $storedCookie) || + ! key_exists (5, $storedCookie) || + ! key_exists (6, $storedCookie)) + continue; + if ($storedCookie[0] !== substr ($parseURL["host"], + -1 * strlen ($storedCookie[0])) || + $storedCookie[2] !== substr ($parseURL["path"], 0, + strlen ($storedCookie[2]))) + continue; + $res[] = $storedCookie[5]."=".$storedCookie[6]; + } + return $res; + } + // }}} +}