#include "StdAfx.h"
#include ".\documentparser.h"

#include "error.h"

CDocumentParser::CDocumentParser( const CItemSelectInfo& v_selectInfo )
	: selectInfo_( v_selectInfo )
{
}

CDocumentParser::~CDocumentParser(void)
{
}

HRESULT CDocumentParser::parse( IHTMLDocument2* v_pDoc )
{
	CComPtr<IHTMLDocument2> doc = v_pDoc;

	CheckAndThrow( parseDocument( doc ) );

	CComPtr<IOleContainer> frames;
	CheckAndThrow( doc.QueryInterface( &frames ) );
	
	CComPtr<IEnumUnknown> enumerator;
	CheckAndThrow( frames->EnumObjects( OLECONTF_EMBEDDINGS, &enumerator ) );

	IUnknown* pUnk;
	ULONG uFetched;
	for(UINT i = 0;
		S_OK == enumerator->Next(1, &pUnk, &uFetched);
		i++)
	{
		CComPtr<IWebBrowser2> frame_browser;

		HRESULT hr = pUnk->QueryInterface(IID_IWebBrowser2, (void**)&frame_browser );
		pUnk->Release();

		if( SUCCEEDED(hr) ) {
			CComPtr<IDispatch> frame_doc_disp;
			CheckAndThrow( frame_browser->get_Document( &frame_doc_disp ) );

			CComPtr<IHTMLDocument2> frame_doc;
			CheckAndThrow( frame_doc_disp.QueryInterface( &frame_doc ) );

			CheckAndThrow( parse( frame_doc ) );
		}
	}
	return S_OK;
}

HRESULT CDocumentParser::parseDocument( IHTMLDocument2* v_pDoc )
{
	CComPtr<IHTMLDocument2> doc = v_pDoc;

	CComPtr<IMalloc> malloc;
	CheckAndThrow( CoGetMalloc( 1, &malloc ) );

	CComPtr<IHTMLElementCollection> anchors;
	CheckAndThrow( doc->get_anchors( &anchors ) );

	CComPtr<IHTMLElementCollection> all;
	CheckAndThrow( doc->get_all( &all ) );

	long all_count = 0;
	CheckAndThrow( all->get_length( &all_count ) );

	for( long all_idx = 0; all_idx < all_count; all_idx++ ) {
		CComVariant idxVar( all_idx );
		CComPtr<IDispatch> element_disp;
		CheckAndThrow( all->item( idxVar, idxVar, &element_disp ) );

		// 摜̔
		if( selectInfo_.is_select_image() ) {
			CComPtr<IHTMLImgElement > image;
			if( SUCCEEDED( element_disp.QueryInterface( &image ) ) ) {
				CComBSTR bstrSrc;
				if( SUCCEEDED( image->get_src( &bstrSrc.m_str ) ) ) {
					bool size_check = true;
					if( selectInfo_.is_use_image_size() ) {
						long width = 0, height = 0;
						image->get_width( &width );
						image->get_height( &height );

						size_check = ( width >= selectInfo_.get_image_size_width() ) && ( height >= selectInfo_.get_image_size_height() );
					}
					if( size_check ) {
						if( bstrSrc.Length() > 0 ) {
							CString src( (LPCWSTR)(BSTR) bstrSrc );
							ItemInfo itemInfo;
							itemInfo.setItem( src );
							itemInfo.setItemType( ItemInfo::ITEMTYPE_IMAGE );
							push_back( itemInfo );
						}
					}
				}
			}
		}

		// N̔
		if( selectInfo_.is_select_linked_image() ||
			selectInfo_.is_select_linked_html() )
		{
			ItemInfo itemInfo;

			CComPtr<IHTMLAnchorElement> anchor;
			CComPtr<IHTMLAreaElement> area;

			// GA
			if( SUCCEEDED( element_disp.QueryInterface( &area ) ) ) {
				CComBSTR bstrHref;
				if( SUCCEEDED( area->get_href( &bstrHref.m_str ) ) ) {
					if( bstrHref.Length() > 0 ) {
						CString href( (LPCWSTR)(BSTR) bstrHref );
						itemInfo.setItem( href );
					}
				}
			}
			// AJ[
			else if( SUCCEEDED( element_disp.QueryInterface( &anchor ) ) ) {
				CComBSTR bstrHref;
				if( SUCCEEDED( anchor->get_href( &bstrHref.m_str ) ) ) {
					if( bstrHref.Length() > 0 ) {
						CString href( (LPCWSTR)(BSTR) bstrHref );
						itemInfo.setItem( href );
					}
				}
			}

			if( ! itemInfo.getItem().IsEmpty() ) {
				// gq̔
				const CString& href = itemInfo.getItem();
				const LPCSTR tmp = href;
				LPCSTR p = tmp;
				while(*p)p++;
				while( p > tmp ) {
					if( *p == '.' ) {
						if( stricmp( p, ".jpg" ) == 0 ||
							stricmp( p, ".jpeg" ) == 0 || 
							stricmp( p, ".gif" ) == 0 ||
							stricmp( p, ".png" ) == 0 ||
							stricmp( p, ".bmp" ) == 0
						)
						{
							if( selectInfo_.is_select_linked_image() ) {
								itemInfo.setItemType( ItemInfo::ITEMTYPE_LINKED_IMAGE );
							}
						}
						else if(
							stricmp( p, ".htm" ) == 0 ||
							stricmp( p, ".html" ) == 0
						)
						{
							if( selectInfo_.is_select_linked_html() ) {
								itemInfo.setItemType( ItemInfo::ITEMTYPE_LINKED_HTML );
							}
						}
						break;
					}
					else if( *p == '/' || *p == '\\' ) {
						break;
					}
					p = CharPrev( tmp, p );
				}

				// 擾ꂽN̓o^
				if( itemInfo.getItemType() != ItemInfo::ITEMTYPE_NONE ) {
					// N̐̊mF
					bool bAccept = true;
					if( selectInfo_.is_use_linked_url() ) {
						bAccept = ( href.Find( selectInfo_.get_linked_url() ) != -1 );
					}
					if( bAccept ) {
						push_back( itemInfo );
					}
				}
			}
		}
	}

	return S_OK;
}
