<?php
	require("../__common__/__define_common.php");
	require("../__common__/include-common-all.php");
	require("../__common__/include-common-html.php");
	if (isset($_POST['gethtml']) and $_POST['url'] <> '') {
		$url = $_POST['url'];
		if (left($url,7) <> 'http://') {
			error_exit('http://以外は取得できません。');
		}
		if (HTTP_PROXY_HOST.'' <> '') {
			$proxy = array(
				'http' => array(
					'proxy' => 'tcp://'.HTTP_PROXY_HOST.':'.HTTP_PROXY_PORT,
					'request_fulluri' => true,
				),
			);
			$proxy_context = stream_context_create($proxy);
			$html = @file_get_contents($url, false, $proxy_context);
		} else 	{
			$html = @file_get_contents($url);
		}
		if ($html) {
			$html = mb_convert_encoding($html, 'UTF-8', MB_CONVERT_ENCODING_AUTO);
			$csv = '';
			$pos = 0;
			while (($table = get_intag_span($html, '<table', '</table>', $pos)) <> '') {
				if ($_COOKIE['html_table2csv_minsize'] == 0 or mb_strlen($table) >= intval($_COOKIE['html_table2csv_minsize'])) {
					$csv .= table_tag2csv($table, $_COOKIE['html_table2csv_addstr']);
				}
			}
			if ($csv <> '') {
				$ary = explode('?', mb_substr($url,7));
				$ary = explode('/', $ary[0]);
				$cnt = count($ary);
				$filename = '';
				for ($ix=0; ($ix<$cnt and $ix<4); $ix++) {
					if ($filename <> '') $filename .= '_';
					$filename .= $ary[$ix];
				}
				$csv = str_ireplace('<br>"', '"', $csv);	// <td>内最後の<br>は除く
				$from = array('&nbsp;', '&#x0009;', '<br>');
				$to = array(' ', "\t", "\n");
				$csv = str_ireplace($from, $to, $csv);
				if ($_COOKIE['html_table2csv_tag_strip'] == 'on') {
					$csv = strip_tags($csv);
				}
				$csv = htmlspecialchars_decode($csv);
				if ($_COOKIE['html_table2csv_sjis'] == 'on') {
					$csv = myfile_ENCODE($csv);
				}
				header('Cache-Control: private, max-age=10800, pre-check=10800');
				header('Content-Disposition: attachment; filename="'.$filename.'.csv"');
				header('Pragma: no-cache');
				header('Content-Type: text/comma-separated-values');
			//	header('Content-Length: '.strlen($csv));	// これを指定すると最後データロスする？。
				if ($_COOKIE['html_table2csv_header'] <> '') {
					echo $_COOKIE['html_table2csv_header']."\n";
				}
				echo $csv;
				exit();
			}
		} else {
			error_exit('取得できませんでした。');
		}
	}
?>
<!DOCTYPE html>
<html lang="ja">
<head>
<meta charset="utf-8">
<meta name="author" content="ok.2nd">
<link rel="stylesheet" href="<?= _STYLE_SHEET_FOLDER ?>/tools_common.css?20120406">
<title>HTML TABLE to CSV</title>
<style>
body {
	background-color: #ffffff;
	margin: 10px;
	padding: 0px;
}
#title{
	font-size: 120%;
	font-weight: bold;
}
</style>
<script src="../scripts/jquery.js"></script>
<script src="../scripts/jquery.cookie.js"></script>
<script src="../scripts/ok2nd.js"></script>
<script>
function cookie_set(form) {
	chgCookieVal("html_table2csv_url", $("#url").val());
	chgCookieVal("html_table2csv_minsize", $("#minsize").val());
	chgCookieVal("html_table2csv_header", $("#csv_header").val());
	chgCookieVal("html_table2csv_addstr", $("#csv_addstr").val());
	return true;
}
</script>
</head>
<body>
<p><?php
	page_header_return_index();
?>
<span id="tool_title">HTML TABLE to CSV</span>
</p>
<form method="POST" action="<?= $_SERVER['SCRIPT_NAME'] ?>" onSubmit="return cookie_set(this);">
URL：<input class="text" type="text" id="url" name="url" value="<?= $_COOKIE['html_table2csv_url'] ?>" size=80>
<p style="margin:5px 0 5px 20px;color:#8b4513;">
抽出テーブルサイズ：<input class="text" type="text" id="minsize" name="minsize" size=8 value="<?= $_COOKIE['html_table2csv_minsize'] ?>">文字以上<br>
CSVヘッダー行：<input class="text" type="text" id="csv_header" name="header" size=100 value="<?= $_COOKIE['html_table2csv_header'] ?>"><br>
CSV行先頭挿入文字列：<input class="text" type="text" id="csv_addstr" name="addstr" size=16 value="<?= $_COOKIE['html_table2csv_addstr'] ?>"><br>
<label><input id="tag_strip" type="checkbox" value="on" onClick="chgCookieCheckOnOff(this, 'html_table2csv_tag_strip')"<?= $_COOKIE['html_table2csv_tag_strip'] == 'on' ? ' checked' : '' ?>>&lt;td&gt;内のHTMLタグ除去</label>
<label style="margin-left:10px;"><input id="sjis" type="checkbox" value="on" onClick="chgCookieCheckOnOff(this, 'html_table2csv_sjis')"<?= $_COOKIE['html_table2csv_sjis'] == 'on' ? ' checked' : '' ?>>CSVファイル→Shift-JIS変換</label>
</span>
</p>
<p style="margin-left:20px;">
<input type="submit" name="gethtml" value="TABLEデータ抽出＆CSVダウンロード">
<input type="reset" name="reset" value="リセット">
</p>
<p style="margin-top:10px;">
※ 「Shift-JIS変換<」を指定しない場合、CSVファイルの文字コードはUTF-8になります。<br>
※ 以下の文字コードは変換します。<br>
</p>
<p style="margin:5px 0 0 20px;">
「&ampnbsp;」　→　スペース<br>「&amp#x0009;」　→　タブ<br>「&lt;br&gt;」　→　改行(LF)
</p>
</form>
<div id="footer" style="margin: 10px 0 0 0;">
<a href="http://ok2nd.web.fc2.com/" target="_blank" style="color:#b0b0ff;">Powered by ok.2nd</a>
</div>
</body>
</html>
<?php
function table_tag2csv($buff, $addstr) {
	$buff = preg_replace("/>[\s]+</is","><",$buff);
	$buff = preg_replace("/^.*<table[^>]*>/Uis","",$buff);
	$buff = preg_replace("/<\/table>.*$/is","",$buff);
	$buff = preg_replace("/<([a-z]+) ([^>]+)>/i","<$1>",$buff);
	$buff = preg_replace("/<th>/i","<td>",$buff);
	$buff = preg_replace("/<\/th>/i","</td>",$buff);
	$buff = preg_replace("/<\/?[^(tr|td)<>]+>/i","",$buff);
	$buff = str_replace("\r\n","\n",$buff);
	$buff = preg_replace("/(\r|\n)/","\r\n",$buff);
	$csv = "";
	if(preg_match_all("/<tr>(.*)<\/tr>/iU",$buff,$trmatches)){
		foreach($trmatches[1] as $rows){
			if ($addstr <> '') {	// by ok.2nd
				$csv .= $addstr;
			}
			if(preg_match_all("/<td>(.*)<\/td>/iU",$rows,$tdmatches)){
				for($i=0;$i<count($tdmatches[1]);$i++){
					if(strpos($tdmatches[1][$i],",")!==false || strpos($tdmatches[1][$i],'"')!==false || strpos($tdmatches[1][$i],"\r\n")!==false){
						$tdmatches[1][$i] = str_replace('"','""',$tdmatches[1][$i]);
					}
					// 無条件に'"'で囲む by ok.2nd
					$tdmatches[1][$i] = '"'.$tdmatches[1][$i].'"';
				}
				$csv .= implode(',',$tdmatches[1]);
			}
			$csv .= "\r\n";
		}
	}
	return $csv;
}
/* ================================
 * table_tag2csv
 *
 * @create  2010/04/09
 * @author  pentan
 * @url     http://pentan.info/
 *
 * Copyright (c) 2009 pentan.info All Rights Reserved.
 * 著作権表示部分の変更削除は禁止です
 * ================================
 */
function table_tag2csv_original($buff) {
	$buff = preg_replace("/>[\s]+</is","><",$buff);
	$buff = preg_replace("/^.*<table[^>]*>/Uis","",$buff);
	$buff = preg_replace("/<\/table>.*$/is","",$buff);
	$buff = preg_replace("/<([a-z]+) ([^>]+)>/i","<$1>",$buff);
	$buff = preg_replace("/<th>/i","<td>",$buff);
	$buff = preg_replace("/<\/th>/i","</td>",$buff);
	$buff = preg_replace("/<\/?[^(tr|td)<>]+>/i","",$buff);
	$buff = str_replace("\r\n","\n",$buff);
	$buff = preg_replace("/(\r|\n)/","\r\n",$buff);
	$csv = "";
	if(preg_match_all("/<tr>(.*)<\/tr>/iU",$buff,$trmatches)){
		foreach($trmatches[1] as $rows){
			if(preg_match_all("/<td>(.*)<\/td>/iU",$rows,$tdmatches)){
				for($i=0;$i<count($tdmatches[1]);$i++){
					if(strpos($tdmatches[1][$i],",")!==false || strpos($tdmatches[1][$i],'"')!==false || strpos($tdmatches[1][$i],"\r\n")!==false){
						$tdmatches[1][$i] = '"'.str_replace('"','""',$tdmatches[1][$i]).'"';
					}
					$tdmatches[1][$i] = htmlspecialchars_decode($tdmatches[1][$i]);
				}
				$csv .= implode(',',$tdmatches[1]);
			}
			$csv .= "\r\n";
		}
	}
	header('Cache-Control: private, max-age=10800, pre-check=10800');
	header('Content-Disposition: attachment; filename="'.date("Y-m-d").'.csv"');
	header('Pragma: no-cache');
	header('Content-Type: text/comma-separated-values');
	header('Content-Length: ' . strlen($csv));
	return $csv;
}
?>
