<?php
/*
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
 * MA 02110-1301, USA.
 *
 * (c) 2019 HOT CAT QUERY FEVERS.
 */
error_reporting(-1);
ini_set('max_execution_time', 600);
header('x-content-type-options: nosniff');
if (filter_has_var(INPUT_GET, 'url'))
{
	$tor = false;
	$request_uri = getenv('REQUEST_URI');
	$exuri = explode('=', $request_uri);
	$segment = $exuri[0]. '=';
	$uri = substr_replace($request_uri, '', 0, strlen($segment));
	$uri = str_replace('+', '%2B', $uri);
	$url = preg_match('/.*\?.*\=.*[\&.*]?/', $uri) || preg_match('/.*\%253F.*\=.*[\%26.*]?/', $uri) ? urldecode($uri) : $uri;
	$extension = strtolower(pathinfo(parse_url($url, PHP_URL_PATH), PATHINFO_EXTENSION));
	$host = parse_url($url, PHP_URL_HOST);
	$scheme = parse_url($url, PHP_URL_SCHEME);
	$path = parse_url($url, PHP_URL_PATH);
	$query = parse_url($url, PHP_URL_QUERY);
	$server_name = 'http://'. getenv('SERVER_NAME'). $segment;
	$filename = basename($path);
	$ads = '//html//*
	[
		contains(@href, "impact-ad") or
		starts-with(@class, "mdCmmn") or
		starts-with(@class|@id, "yjads")
	]';
	$xmls = ['atom', 'rdf', 'rss', 'xml'];
	$cookie = sys_get_temp_dir(). '/'. $host;
	if ($post = file_get_contents('php://input')) header('Location: '. $server_name. strtok($url, '?'). '?'. $post);
	$curl = curl($url);
	if (!$curl && $response_code === 0) exit(http_response_code(503));
	if (strpos($curl, 'cdn-cgi/l/chk_jschl') !== false)
	{
		if (!is_file($cookie) || basename($cookie) !== $host)
			$curl = curl(CloudflareBypass($curl, $url), $url);
	}
	$encoding = preg_match('/<(meta|\?xml) .*?(charset=|encoding=)["\']?([\w\-]+)["\']?/i', mb_substr($curl, 20, 1000), $match) ? mb_preferred_mime_name($match[3]) : 'ASCII,UTF-8,SJIS,JIS,EUC-JP';
	if (strpos($mime, 'x-empty') !== false) exit();
	elseif ((stripos($curl, '<rss') !== false && stripos($curl, '<html') === false) || array_search($extension, $xmls) !== false)
	{
		foreach($hd as $phd) header($phd);
		if (!$curl = @mb_convert_encoding($curl, 'HTML-ENTITIES', $encoding)) $curl = mb_convert_encoding($curl, 'HTML-ENTITIES', 'auto');
		$curl = preg_replace('/(<copyright[^>]*>)(.*?)(<\/copyright[^>]*>)/is', '', $curl);
		$curl = preg_replace('/(<itunes:[^>]+>.*?<\/itunes:[^>]+>)/is', '', $curl);
		$curl = preg_replace('/(about|href|src)=["\']([\w\/:%#\$&\?\(\)~\.=\+\-]+)["\']?/', '$1="'. $server_name. '$2"', $curl);
		$curl = preg_replace('/<\/itunes:[^>]+>/i', '', $curl);
		$curl = preg_replace('/<itunes:.* \/>/i', '', $curl);
		$curl = preg_replace_callback('/(<content[^>]*>)(.*?)(<\/content[^>]*>)/is', 'cdata', $curl);
		$curl = preg_replace_callback('/(<description[^>]*>)(.*?)(<\/description[^>]*>)/is', 'cdata', $curl);
		$curl = preg_replace_callback('/(<guid[^>]*>)(.*?)(<\/guid[^>]*>)/is', 'xlink', $curl);
		$curl = preg_replace_callback('/(<id[^>]*>)(.*?)(<\/id[^>]*>)/is', 'xlink', $curl);
		$curl = preg_replace_callback('/(<link[^>]*>)(.*?)(<\/link[^>]*>)/is', 'xlink', $curl);
		$curl = preg_replace_callback('/(<title[^>]*>)(.*?)(<\/title[^>]*>)/is', 'cdata', $curl);
		exit(html_entity_decode($curl));
	}
	elseif (strpos($mime, 'html') === false)
	{
		if (strpos($mime, 'css') !== false)
		{
			if (preg_match('/@charset[\s\S]?["\'](.*?)["\'];/', $curl, $charset))
			{
				$encoding = $charset[1];
				$curl = str_replace($charset[0], '', $curl);
			}
			$curl = @mb_convert_encoding($curl, 'HTML-ENTITIES', $encoding);

			if (strpos($curl, '/*') !== false) $curl = preg_replace('/\/\*.*?\*\//is', '', $curl);
#bg svg
			if (preg_match_all('/\{(.*?)\}/s', $curl, $brackets))
			{
				foreach($brackets[1] as $css)
				{
					if (strpos($css, 'background') !== false && strpos($css, 'svg') !== false && strpos($css, 'repeat') !== false)
						$curl = str_replace($css, '', $curl);
				}
			}
			if (preg_match('/(@import ?["\'])(.*?)(["\'])/is', $curl))
				$curl = preg_replace_callback('/(@import ?["\'])(.*?)(["\'])/is', 'export_url', $curl);

			if (preg_match('/([url|URL] ?\(["\']?)(.*?)(["\']?\))/i', $curl))
				$curl = preg_replace_callback('/([url|URL] ?\(["\']?)(.*?)(["\']?\))/i', 'export_url', $curl);
#cn font
			if (strpos($curl, '&#65419;&#65422;&#65420;&#34537') !== false)
				$curl = str_replace('&#65419;&#65422;&#65420;&#34537', 'SimSun', $curl);

			exit(html_entity_decode($curl));
		}
		else
		{
			$hd[] = 'Content-Disposition: filename="'. $filename. '"';
			rsort($hd);
			foreach($hd as $phd) header($phd);
			exit($curl);
		}
	}
	else
	{
		$curl = @mb_convert_encoding($curl, 'HTML-ENTITIES', $encoding);
		$dom = new DOMDocument();
		$dom->formatOutput = true;
		libxml_use_internal_errors(true);
#script
#		if (stripos($curl, '<script') !== false) $curl = preg_replace('/(<script[^>]*>.*?<\/script>)/is', '', $curl);

#space
#		$curl = preg_replace('/(?s)<(pre|code)[^>]*>.*?<\/(pre|code)>(*SKIP)(*F)|\s\s+</', '<', $curl);

		if ($curl) $dom->loadHTML($curl);
		$xpath = new DOMXPath($dom);
#comment
#		foreach($xpath->query('//comment()') as $comment) if ($comment) $comment->parentNode->removeChild($comment);

#html
		if ($xpath->query('//html')->length > 0 && !$xpath->query('//html')->item(0)->hasAttribute('lang'))
			$xpath->query('//html')->item(0)->setAttribute('lang', 'ja');
#base
		if ($xpath->query('//base')->length > 0 && $base = $xpath->query('//base')->item(0)->getAttribute('href'))
			$url = $base[0] === '/' ? dirname($url) : $base;
#title
		if (isset($xpath->query('//title')->item(0)->textContent))
			$xpath->query('//title')->item(0)->textContent .= ' - Previewer by HCQF';
#style
		foreach($xpath->query('//style') as $style)
		{
			if ($style)
			{
				if (strpos($style->nodeValue, '/*') !== false)
					$style->nodeValue = preg_replace('/\/\*.*?\*\//s', '', $style->nodeValue);
				if (strpos($style->nodeValue, 'url') !== false)
					$style->nodeValue = preg_replace_callback('/([\s|\S]?url[\s|\S]?\(["\']?)(.*?)(["\']?\))/is', 'export_url', $style->nodeValue);
				if (preg_match('/(@import[\s\S]?["\'])(.*?)(["\'])/is', $style->nodeValue))
					$style->nodeValue = preg_replace_callback('/(@import[\s\S]?["\'])(.*?)(["\'])/is', 'export_url', $style->nodeValue);
			}
		}
#svg
		foreach($xpath->query('//svg') as $svg)
			if ($svg && !$svg->hasAttribute('width')) $svg->setAttribute('width', '1em');
#noscript
#		foreach($xpath->query('//noscript') as $noscript)
#			if ($noscript && strpos($host, 'wikipedia') === false) $noscript->parentNode->removeChild($noscript);

#option
#		foreach($xpath->query('//option') as $opt)
#			if ($opt && $val = $opt->getAttribute('value')) $opt->setAttribute('value', path2uri($val, $url));

#query
		if (isset($query))
		{
			if (strpos($query, 'start') !== false)
				$query = preg_replace('/search_id=\d+&start=\d+&/', '', $query);
			if (strpos($query, 'relevance') !== false)
				$query = preg_replace('/page=\d+&q=\w+&o=relevance&/', '', $query);
		}
#input type text
		foreach($xpath->query('//input') as $input)
			if ($input->getAttribute('type') === 'text' || $input->getAttribute('type') === 'search' || $input->getAttribute('value') === '') $input->setAttribute('style', 'height: inherit;');
#*
		foreach($xpath->query('//html//*') as $all)
		{
#on*
#			if ($all->hasAttribute('onclick')) $all->removeAttribute('onclick');

			if ($all->hasAttribute('class'))
			{
				if (preg_match('/\s+/', $all->getAttribute('class'))) $all->setAttribute('class', preg_replace('/\s+/', ' ', trim($all->getAttribute('class'))));
			}
#style
			if ($all->hasAttribute('style'))
			{
				if ($bg = preg_replace_callback('/([\s|\S]?url[\s|\S]?\(["\']?)(.*?)(["\']?\))/is', 'export_url', $all->getAttribute('style'))) $all->setAttribute('style', $bg);
			}
#href
			if ($href = strpos($all->getAttribute('href'), '%') !== false ? $all->getAttribute('href') : r($all->getAttribute('href')))
			{
				if (preg_match('|://%\w+[^/]*/|', $href))
					$href = preg_replace_callback('|(://)(%\w+[^/]*)(/)|', function($m){return $m[1]. idn_to_ascii(urldecode($m[2]), IDNA_DEFAULT, INTL_IDNA_VARIANT_UTS46). $m[3];}, $href);
				if (!$all->hasAttribute('rel') && isset($query) && strpos($href, 'search.php') !== false)
					$all->setAttribute('href', path2uri($href. '&'. $query, $url));
				else
					$all->setAttribute('href', path2uri($href, $url));
			}
#src
			if ($all->hasAttribute('data-lazy'))
			{
				$src = $all->getAttribute('data-lazy');
				$all->removeAttribute('data-lazy');
			}
			elseif ($all->hasAttribute('data-src'))
			{
				$src = $all->getAttribute('data-src');
				$all->removeAttribute('data-src');
			}
			elseif ($all->hasAttribute('file'))
			{
				$src = $all->getAttribute('file');
				$all->removeAttribute('file');
			}
			else
				$src = $all->getAttribute('src');
			if ($src) $all->setAttribute('src', path2uri($src, $url));
#action
			if ($all->hasAttribute('action') || $all->hasAttribute('method'))
			{
				$action = $all->getAttribute('action');
				$act = path2uri($action, $url);
				if ($action) $all->setAttribute('action', $act);
				$all->setAttribute('method', 'post');
			}
#background
			if ($all->hasAttribute('background')) $all->setAttribute('background', path2uri($all->getAttribute('background'), $url));
#poster
			if ($all->hasAttribute('poster')) $all->setAttribute('poster', path2uri($all->getAttribute('poster'), $url));
		}
#ad
#		foreach($xpath->query($ads) as $ad) if ($ad) $ad->parentNode->removeChild($ad);

#div lazy
		foreach($xpath->query('//div') as $div)
			if ($div->hasAttribute('src')) $div->setAttribute('style', 'background-image: url('. $div->getAttribute('src'). ');');

		exit($dom->saveHTML($dom->documentElement));
	}
}
function r($path)
{
	return str_replace(array('%23', '%2F', '%3A', '%3F', '%3D'), array('#', '/', ':', '?', '='), rawurlencode($path));
}
function curl($url, $referer='')
{
	global $host, $post, $cookie, $tor;
	$headers = [
		'Accept-Language: '. getenv('HTTP_ACCEPT_LANGUAGE'),
		'Accept: '. getenv('HTTP_ACCEPT'),
		'Cache-Control: '. getenv('HTTP_CACHE_CONTROL'),
		'Connection: '. getenv('HTTP_CONNECTION'),
		'DNT: '. getenv('HTTP_DNT'),
		'Host: '. $host
	];
	$ch = curl_init();
	curl_setopt($ch, CURLINFO_HEADER_OUT, true);
	curl_setopt($ch, CURLOPT_AUTOREFERER, true);
	curl_setopt($ch, CURLOPT_ENCODING, getenv('HTTP_ACCEPT_ENCODING'));
	curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
	curl_setopt($ch, CURLOPT_FORBID_REUSE, true);
	curl_setopt($ch, CURLOPT_HEADER, false);
	curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
	curl_setopt($ch, CURLOPT_MAXREDIRS, 5);
	curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
	curl_setopt($ch, CURLOPT_SSLVERSION, CURL_SSLVERSION_DEFAULT);
	curl_setopt($ch, CURLOPT_TIMEOUT, 600);
	curl_setopt($ch, CURLOPT_URL, $url);
	curl_setopt($ch, CURLOPT_USERAGENT, getenv('HTTP_USER_AGENT'));
	if ($referer || is_file($cookie) && basename($cookie) === $host)
	{
		curl_setopt($ch, CURLOPT_COOKIE, $cookie);
		curl_setopt($ch, CURLOPT_COOKIEFILE, $cookie);
		curl_setopt($ch, CURLOPT_COOKIEJAR, $cookie);
		curl_setopt($ch, CURLOPT_REFERER, $referer);
	}
	else
	{
		global $scheme;
		curl_setopt($ch, CURLOPT_REFERER, $scheme. '://'. $host);
	}
	curl_setopt($ch, CURLOPT_HEADERFUNCTION, function($ch, $hd)
	{
		if (stripos($hd, 'Content-Type') !== false || stripos($hd, 'name') !== false) $GLOBALS['hd'][] = $hd;
		return strlen($hd);
	});
	if ($tor)
	{
		curl_setopt($ch, CURLOPT_PROXY, '127.0.0.1');
		curl_setopt($ch, CURLOPT_PROXYPORT, '9050');
		curl_setopt($ch, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS5_HOSTNAME);
	}
	if (!$cx = curl_exec($ch))
	{
		curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
		$cx = curl_exec($ch);
	}
	if (!$cx)
	{
		curl_setopt($ch, CURLOPT_IPRESOLVE, CURL_IPRESOLVE_V4);
		$cx = curl_exec($ch);
	}
	$effective_url = curl_getinfo($ch, CURLINFO_EFFECTIVE_URL);
	$GLOBALS['mime'] = curl_getinfo($ch, CURLINFO_CONTENT_TYPE);
	$GLOBALS['response_code'] = curl_getinfo($ch, CURLINFO_RESPONSE_CODE);
	curl_close($ch);
	if (!$referer && !$post && $url != $effective_url)
	{
		global $server_name;
		header('Location: '. $server_name. $effective_url);
		exit();
	}
	return $cx;
}
function path2uri($path, $url)
{
	global $server_name, $scheme;

	if (substr($path, 0, 1) === '#' || substr($path, 0, 5) === 'data:' || substr($path, 0, 11) === 'javascript:' || substr($path, 0, 7) === 'mailto:' || substr($path, 0, 4) === 'tel:')
		return $path;

	if (substr($path, 0, 1) === '?')
		return $server_name. strtok($url, '?'). $path;

	if (strpos($path, 'http') === 0)
		return $server_name. $path;

	if (substr($path, 0, 2) === '//' && isset($scheme))
		return $server_name. $scheme. ':'. $path;

	if (is_numeric($path) && is_numeric(basename($url)))
		return $server_name. dirname($url). '/'. basename($path);

	if (substr($path, 0, 1) !== '.' && substr($path, 0, 1) !== '/' && is_numeric(basename($url)))
		return $server_name. dirname($url). '/'. $path;

	$exurl = explode('/', $url);

	if (!isset($exurl[2]) || $path === './')
		return $server_name. dirname($url);

	$fqnd = $scheme. '://'. $exurl[2];

	if (!$exurl = parse_url($url))
		return $server_name. $url;

	$xpath = isset($exurl['path']) ? $exurl['path'] : '/';
	$xpath = substr($xpath, -1) !== '/' ? dirname($xpath) : $xpath;

	if (strpos($path, '?') === 0)
		return $server_name. $fqnd. $xpath. $path;

	if (strpos($path, '/') === 0)
		return $server_name. $fqnd. $path;

	$xpathy = array_filter(explode('/', $xpath), 'strlen');
	$endpath = end($xpathy);

	if (strpos($endpath, '.') !== false) array_pop($xpathy);

	foreach(explode('/', $path) as $expath)
	{
		if ($expath === '.') continue;
		if ($expath === '..' && array_pop($xpathy)) continue;
		if ($expath !== '') $xpathy[] = $expath;
	}
	$uri = $fqnd. '/'. implode('/', $xpathy);

	if (substr($path, -1) === '/') $uri .= '/';

	return $server_name. $uri;
}
function cdata($m)
{
	if (strpos($m[0], 'CDATA') !== false)
		return $m[0];
	else
		return $m[1]. '<![CDATA['. html_entity_decode(str_replace(PHP_EOL, '', $m[2])). ']]>'. $m[3];
}
function xlink($m)
{
	global $server_name;
	if (isset($m[1]))
		return $m[1]. $server_name. str_replace('&amp;', '%26', trim($m[2])). $m[3];
}
function export_url($m)
{
	global $scheme, $url;
	if ($n = str_replace(array("'", '"'), '', trim($m[2])))
	{
		if (substr($n, 0, 2) === '//' && isset($scheme))
			$n = $scheme. ':'. $n;
		if (strpos($n, 'data:') !== false)
			return $m[1]. $n. $m[3];
		else
			return $m[1]. path2uri($n, $url). $m[3];
	}
}
function CloudflareBypass($iuam, $url)
{
	#MIT License
	#Copyright (c) KyranRana
	#https://github.com/KyranRana/cloudflare-bypass/
	if (preg_match('/(?<=s,t,o,p,b,r,e,a,k,i,n,g,f,\s)(\w+)={"(\w+)":(.+?)(?=})/', $iuam, $iuam_jschl_def_matches))
	{
		if (list($_, $var1, $var2, $code) = $iuam_jschl_def_matches)
		{
			preg_match_all('/' . $var1 . '\.' . $var2 . '[+\-*\/]?=.+?;/', $iuam, $iuam_jschl_matches);
			$iuam_jschl = '';
			$iuam_jschl .= "\$jschl_answer=$code;\n";
			foreach ($iuam_jschl_matches[0] as $jschl_match)
				$iuam_jschl .= str_replace("$var1.$var2", '$jschl_answer', $jschl_match) . "\n";

			$iuam_jschl = str_replace(']+[]', '].""', $iuam_jschl);
			$iuam_jschl = str_replace(array('![]', '+[]'), 0, $iuam_jschl);

			while(preg_match_all('/\([^()]+\)/', $iuam_jschl, $iuam_jschl_eq_matches))
			{
				foreach ($iuam_jschl_eq_matches[0] as $eq_match)
				{
					if (strpos($eq_match, '.""') !== false)
					{
						$eq_answer = '"'. implode('',
						array_map(function($match){return eval('return '. str_replace(array('(', ')'), '', $match). ';');},
						array_filter(explode('.""', $eq_match), function($elem){return trim(str_replace(array('(', ')'), '', $elem)) !== '';}))). '"';
						$iuam_jschl = str_replace($eq_match, $eq_answer, $iuam_jschl);
					}
					else
					{
						if (strpos($eq_match, '"') !== false)
							$eq_answer = implode('.', array_map(function($match){return strpos($match, '"') !== false ? $match : '"'. $match. '"';}, explode('+', $eq_match)));
						else
							$eq_answer = $eq_match;
						$eq_answer = eval('return '. str_replace(array('(', ')'), '', $eq_answer). ';');
						$iuam_jschl = str_replace($eq_match, $eq_answer, $iuam_jschl);
					}
				}
			}
			eval($iuam_jschl);
			preg_match('/name="r" value="([^"]*)"/', $iuam, $matches);
			$s = $matches[1] ?? null;
			preg_match('/name="jschl_vc" value="([^"]+)"/', $iuam, $matches);
			$jschl_vc = $matches[1] ?? null;
			preg_match('/name="pass" value="([^"]+)"/', $iuam, $matches);
			$pass = $matches[1] ?? null;
			$uri = parse_url($url);
			$query = [];
			if (isset($uri['query'])) parse_str($uri['query'], $query);
			$jschl_answer = round($jschl_answer, 10) + mb_strlen($uri['host']);
			sleep(4);
			return sprintf("%s://%s/cdn-cgi/l/chk_jschl?%s", $uri['scheme'], $uri['host'], http_build_query(array_merge(['s' => $s,'jschl_vc' => $jschl_vc, 'pass' => $pass, 'jschl_answer' => $jschl_answer], $query)));
		}
	}
}