/**********************************************************************
 
	Copyright (C) 2005- Hirohisa MORI <joshua@nichibun.ac.jp>
 
	This program is free software; you can redistribute it 
	and/or modify it under the terms of the GLOBALBASE 
	Library General Public License (G-LGPL) as published by 

	http://www.globalbase.org/
 
	This program is distributed in the hope that it will be 
	useful, but WITHOUT ANY WARRANTY; without even the 
	implied warranty of MERCHANTABILITY or FITNESS FOR A 
	PARTICULAR PURPOSE.

**********************************************************************/

#include	"memory_debug.h"
#include	"matrix.h"

int _debug_flag;

int mx_cache_error;

int
mx_cache_boundary(MX_CACHE * c,int * ix);

//#define MXC_DEBUG	1

void *
xx_mxc_alloc(MX_CACHE * c,int size,char * __f,int __l)
{
int blk,r;
MBLK * ret;
	blk = size >> c->mblk_logsize;
	r = size & ((1<<c->mblk_logsize)-1);
	if ( r )
		blk ++;
#ifndef MXC_DEBUG
	if ( _debug_flag || (blk > c->mblk_hashsize-1) ) {
		ret = xx_d_alloc(sizeof(MBLK)+size,__f,__l);
#else
		ret = xx_d_alloc(sizeof(MBLK)+size,__f,__l);
/*
		ret = xx_d_alloc(sizeof(MBLK)+(blk<<c->mblk_logsize),
					__f,__l);
*/
#endif
		ret->size = MBLK_ALLOC;
		ret->alloc_file = __f;
		ret->alloc_line = __l;
		ret->free_file = 0;
		ret->free_line = 0;
		return ret+1;
#ifndef MXC_DEBUG
	}
#endif
	ret = c->mblk_hash[blk];
	if ( ret == 0 ) {
		ret = xx_d_alloc(sizeof(MBLK)+(blk<<c->mblk_logsize),
					__f,__l);
		ret->size = blk;
		ret->cnt = 1;
		ret->alloc_file = __f;
		ret->alloc_line = __l;
		ret->free_file = 0;
		ret->free_line = 0;
		return ret+1;
	}
	c->mblk_hash[blk] = ret->next;
	ret->cnt ++;
	if ( ret->cnt == MBLK_ALLOC )
		ret->cnt --;
	ret->free_file = 0;
	ret->free_line = 0;
	return ret+1;
}

void
xx_mxc_free(MX_CACHE * c,void * ptr,char * __f,int __l)
{
MBLK * p;
int blk;
	p = ptr;
	p --;
	if ( p->free_file )
		er_panic("xx_mxc_free");
	if ( p->size == MBLK_ALLOC ) {
		d_f_ree(p);
		return;
	}
	p->free_file = __f;
	p->free_line = __l;
	blk = p->size;
	if ( blk > c->mblk_hashsize-1 )
		er_panic("mxc_free");
	if ( blk < 0 )
		er_panic("mxc_free2");
	p->next = c->mblk_hash[blk];
	c->mblk_hash[blk] = p;
}

void
mxc_setup(MX_CACHE * c)
{
	if ( c->mblk_logsize == 0 )
		c->mblk_logsize = MBLK_LOGSIZE;
	if ( c->mblk_hashsize == 0 )
		c->mblk_hashsize = MBLK_HASHSIZE;
	c->mblk_hash = d_alloc(sizeof(MBLK*)*c->mblk_hashsize);
	memset(c->mblk_hash,0,sizeof(MBLK*)*c->mblk_hashsize);
	c->user_header = 0;
	c->gn_stack = 0;
}


void
flush_mx_cache(MX_CACHE * c,int f)
{
int i;
MBLK * p;
	if ( c->n == 0 )
		return;
	for ( i = 0 ; i < c->ds_len; i ++ )
		matrix_node_channel_unlock(c->n,c->dirty);
	unlock_node(c->n,0);
	c->n = 0;
	if ( f ) {
		if ( c->user_header ) {
			(*c->user_header->free_func)(c);
			c->user_header = 0;
		}
		for ( i = 0 ; i < c->mblk_hashsize ; i ++ ) {
			for ( ; c->mblk_hash[i] ; ) {
				p = c->mblk_hash[i];
				c->mblk_hash[i] = p->next;
				d_f_ree(p);
			}
		}
	}
}

void
mxc_close(MX_CACHE * c)
{
	flush_mx_cache(c,1);
	if ( c->mblk_hash )
		d_f_ree(c->mblk_hash);
}

int
pop_gn(MX_CACHE * c)
{
MX_GN_LIST * lst;
	if ( c->gn_stack == 0 )
		return -1;
	lst = c->gn_stack;
	c->gn_stack = lst->next;
	c->gn = lst->d;
	d_f_ree(lst);
	return 0;
}



void
push_gn(MX_CACHE * c,
	int gn_tree_node,
	int gn_create,
	int gn_wait)
{
MX_GN_LIST * lst;
	lst = d_alloc(sizeof(*lst));
	lst->d = c->gn;
	lst->next = c->gn_stack;
	c->gn_stack = lst;

	c->gn.gn_tree_node
		= gn_tree_node;
	c->gn.gn_create
		= gn_create;
	c->gn.gn_wait
		= gn_wait;
}

int
get_mx_cache(MX_CACHE * c,INTEGER64 * src)
{
MATRIX_NODE * n;
int err;
int i;
void * d;
MATRIX_DATA_TYPE * tp;
int ef;
	if ( c->n ) {
		if ( cmp_dim_code(c->m,c->n->dim_code,src) ) {
			flush_mx_cache(c,0);
			goto load;
		}
		c->hit_cnt ++;
	}
	else {
	load:
		c->miss_hit_cnt ++;
		n = 0;
		err = 0;
		get_matrix_node_channel
			(&err,&n,c->m,
			src,c->access_ch[0],
			c->gn.gn_tree_node,c->gn.gn_create,0,0);
		if ( n && err >= 0 ) {
			matrix_node_channel_unlock(n,0);
			unlock_node(n,0);
		}
		err = 0;
		n = get_matrix_node_wait(&err,c->m,src,
					c->gn.gn_wait);
		if ( err ) {
//ss_printf("err=%i\n",err);
			return -1;
		}

//ss_printf("CC %s %s %i\n",pt_dc(c->m,src,PTDC_NODE_ID),pt_dc(c->m,n->dim_code,PTDC_NODE_ID),n->status);
		c->n = n;
		ef = 0;
		if ( c->ds == 0 ) {
			c->ds = d_alloc(sizeof(MATRIX_DH_SET)*
						c->ds_len);
			c->el_sizes = d_alloc(sizeof(int)*c->ds_len);
			ef = 1;
		}
		for ( i = 0 ; i < c->ds_len ; i ++ ) {
			d = get_matrix_node_channel
				(&err,&n,c->m,src,c->access_ch[i],
				GN_NODE,GN_NODE_CREATE,0,0);
			c->ds[i].tp = tp = c->m->channel_info
					[c->access_ch[i]].data_type;
			if ( d == 0 ) {
				memset(&c->ds[i],0,sizeof(MATRIX_DH_SET));
			}
			else {
				if ( c->ds[i].tp->parent ) {
					get_matrix_dh_set(&c->ds[i],d);
				}
				else {
					memset(&c->ds[i],0,sizeof(MATRIX_DH_SET));
					c->ds[i].tp = tp;
					c->ds[i].offset = d;
				}
			}
		}
		if ( ef ) {
			for ( i = 0 ; i < c->ds_len ; i ++ ) {
				tp = c->ds[i].tp;
				if ( tp == 0 )
					continue;
				if ( tp->parent )
					c->el_sizes[i] = (*tp->parent->get_size)(tp,0);
				else	c->el_sizes[i] = (*tp->get_size)(tp,0);
			}
		}
	}
	return 0;
}

int
mx_cache_boundary(MX_CACHE * c,int * ix)
{
int i,j;
MATRIX_DH_SET * dsp;

	for ( i = 0 ; i < c->ds_len ; i ++ ) {
		dsp = &c->ds[i];
		if ( dsp->ix == 0 )
			continue;
		if ( dsp->hd == 0 )
			continue;
		for ( j = 0 ; j < c->m->p.dim ; j ++ ) {
			if ( dsp->ix[j] <= ix[j] )
				return -1;
		}
	}
	return 0;
}


int
write_mx_cache(MX_CACHE_PARAM * p)
{
int i;
int * ofs;
//MATRIX_DATA_TYPE * tp;
int el_size;
void * p1;
MATRIX * m;
MX_CACHE * c;
INTEGER64 ix,st;
MATRIX_DH_SET * dsp;
int pp,xx;

	if ( get_mx_cache(p->c,p->dc) < 0 )
		return -1;
	c = p->c;
	m = c->m;
	ofs = mxc_alloc(c,sizeof(int)*m->p.dim);
	if ( p->ofs ) {
		for ( i = 0 ; i < m->p.dim ; i ++ ) {
			st = p->dc[i+1] >> (p->dc[0] * m->dim_divide[i]);
			ix = p->ofs[i] + st;
			st &= -(((INTEGER64)1)<<m->block_size[i]);
			ofs[i] = ix - st;
		}
	}
	else {
		for ( i = 0 ; i < m->p.dim ; i ++ )
			ofs[i] = (p->dc[i+1] >> (p->dc[0]
					* m->dim_divide[i])) &
					((((INTEGER64)1) 
						<< m->block_size[i])-1);
	}

	if ( mx_cache_boundary(p->c,ofs) < 0 ) {
		mxc_free(p->c,ofs);
		return -1;
	}
	for ( i = 0 ; i < c->ds_len ; i ++ ) {
		xx = p->data_ix[i].x;
		if ( xx == MXC_INVALID )
			continue;
		pp = p->data_ix[i].p;
		dsp = &c->ds[i];
		if ( dsp->tp->parent ) {
//			tp = c->ds[i].tp->parent;
//			el_size = (*tp->get_size)(tp,0);
			el_size = c->el_sizes[i];
			p1 = ((char*)dsp->offset)
				+ get_seq_from_ix(ofs,dsp->ix,
					m->p.dim) *
					el_size;
			memcpy(p1,
				((char*)p->data_ptrs[pp]) + el_size*xx,
				el_size);
		}
		else {
//			tp = c->ds[i].tp;
//			el_size = (*tp->get_size)(tp,0);
			el_size = c->el_sizes[i];
			memcpy(dsp->offset,
				((char*)p->data_ptrs[pp]) + xx*el_size,el_size);
		}
		c->dirty = NF_DIRTY;
	}
	mxc_free(c,ofs);
	return 0;
}

int
read_mx_cache(MX_CACHE_PARAM * p)
{
int i;
int * ofs;
//MATRIX_DATA_TYPE * tp;
int el_size;
void * p1;
MATRIX * m;
MX_CACHE * c;
INTEGER64 st,ix;
int pp,xx;
MATRIX_DH_SET * dsp;

	if ( get_mx_cache(p->c,p->dc) < 0 )
		return -1;
	c = p->c;
	m = c->m;
	ofs = mxc_alloc(c,sizeof(int)*m->p.dim);
	if ( p->ofs ) {
		for ( i = 0 ; i < m->p.dim ; i ++ ) {
			st = p->dc[i+1] >> (p->dc[0] * m->dim_divide[i]);
			ix = p->ofs[i] + st;
			st &= -(((INTEGER64)1)<<m->block_size[i]);
			ofs[i] = ix - st;
		}
	}
	else {
		for ( i = 0 ; i < m->p.dim ; i ++ )
			ofs[i] = (p->dc[i+1] >> (p->dc[0]
					* m->dim_divide[i])) &
					((((INTEGER64)1)
						<<m->block_size[i])-1);
	}
	if ( (p->flags & MXCF_OPTIM_1) == 0 )
		if ( mx_cache_boundary(p->c,ofs) < 0 ) {
			mxc_free(p->c,ofs);
			return -1;
		}
	for ( i = 0 ; i < c->ds_len ; i ++ ) {
		xx = p->data_ix[i].x;
		if ( xx == MXC_INVALID )
			continue;
		pp = p->data_ix[i].p;
		dsp = &c->ds[i];
		if ( dsp->tp->parent ) {
//			tp = c->ds[i].tp->parent;
//			el_size = (*tp->get_size)(tp,0);
			el_size = c->el_sizes[i];
			p1 = ((char*)dsp->offset)
				+ get_seq_from_ix(ofs,dsp->ix,
					m->p.dim) *
					el_size;
			memcpy(((char*)p->data_ptrs[pp]) + xx*el_size,
				p1,el_size);
		}
		else {
//			tp = c->ds[i].tp;
//			el_size = (*tp->get_size)(tp,0);
			el_size = c->el_sizes[i];
			memcpy(((char*)p->data_ptrs[pp]) + xx*el_size,
				dsp->offset,el_size);
		}
	}
	mxc_free(c,ofs);
	return 0;
}

void
check_memcpy(char * a,char * b,int size)
{
int i;
int ret;
	ret = 0;
	for ( i = size ; i > 0 ; i -- , a++ ,b ++ )
		if ( *a != *b ) {
			ret = 1;
		}
	if ( ret )
		ss_printf("NOT EQUAL\n");
	else	ss_printf("EQU\n");
}



int
write_mx_cache_vector(MX_CACHE_PARAM * _p)
{
int i,j;
int * ofs;
//MATRIX_DATA_TYPE * tp;
int el_size;
void * p1;
MATRIX * m;
MX_CACHE * c;
INTEGER64 ix,st;
MATRIX_DH_SET * dsp;
int pp,xx;
int len,slen,tlen,plen;
int offset_ix;
char* buffer_ptr;
INTEGER64 * dc;
MX_CACHE_PARAM p;
	p = *_p;

	if ( get_mx_cache(p.c,p.dc) < 0 ) {
		mx_cache_error = -1;
		return -1;
	}
	c = p.c;
	m = c->m;
	ofs = mxc_alloc(c,sizeof(int)*m->p.dim);
	if ( p.ofs ) {
		for ( i = 0 ; i < m->p.dim ; i ++ ) {
			st = p.dc[i+1] >> (p.dc[0] * m->dim_divide[i]);
			ix = p.ofs[i] + st;
			st &= -(((INTEGER64)1)<<m->block_size[i]);
			ofs[i] = ix - st;
		}
	}
	else {
		for ( i = 0 ; i < m->p.dim ; i ++ )
			ofs[i] = (p.dc[i+1] >> (p.dc[0]
					* m->dim_divide[i])) &
					((((INTEGER64)1) 
						<< m->block_size[i])-1);
	}

	if ( mx_cache_boundary(p.c,ofs) < 0 ) {
		mxc_free(p.c,ofs);
		mx_cache_error = -2;
		return -1;
	}


	dc = mxc_alloc(c,sizeof(INTEGER64)*(m->p.dim+1));
	for ( i = 0 ; i < m->p.dim ; i ++ ) {
		if ( p.dc[i+1] >= m->pixel_size[i] ) {
			mx_cache_error = -3;
			goto err;
		}
		if ( p.dc[i+1] < 0 ) {
			mx_cache_error = -4;
			goto err;
		}
		dc[i+1] = p.dc[i+1];
	}
	dc[0] = p.dc[0];
	p.dc = dc;
	len = p.vector_len;
	offset_ix = 0;
	if ( len <= 0 )
		goto end;
	for ( ; len > 0 ; ) {

//ss_printf("len %i offset_ix %i\n",len,offset_ix);
		plen = -1;
		for ( i = 0 ; i < c->ds_len ; i ++ ) {
			xx = p.data_ix[i].x;
			if ( xx == MXC_INVALID )
				continue;
			pp = p.data_ix[i].p;
			dsp = &c->ds[i];
			if ( dsp->tp->parent ) {
				tlen = (dsp->ix[0] - ofs[0]) ; //&0xfffffffe;
				if ( tlen < len )
					slen = tlen;
				else	slen = len;
				if ( plen < 0 )
					plen = slen;
				else if ( plen != slen ) {
					mx_cache_error = -5;
					goto err;
				}

				el_size = c->el_sizes[i];
				p1 = ((char*)dsp->offset)
					+ get_seq_from_ix(ofs,dsp->ix,
						m->p.dim) *
						el_size;
				buffer_ptr = ((char**)p.data_ptrs[pp])[xx];
				if ( buffer_ptr == 0 )
					continue;
				
/*
ss_printf("BB %x %x %x %x\n",
(buffer_ptr + el_size*offset_ix)[0],
(buffer_ptr + el_size*offset_ix)[1],
(buffer_ptr + el_size*offset_ix)[2],
(buffer_ptr + el_size*offset_ix)[3]);
*/
				if ( p.vector_valid ) {
					for ( j = 0 ; j < slen ; j ++  ) {
						if ( p.vector_valid[offset_ix+j] == 0 )
							continue;
						memcpy(((char*)p1) + j*el_size,
							buffer_ptr + el_size*(offset_ix+j),
							el_size);
					}
				}
				else
					memcpy(p1,
						buffer_ptr + el_size*offset_ix,
						el_size*slen);
			}
			else {
				mx_cache_error = -6;
				goto err;
			}
			c->dirty = NF_DIRTY;
		}
		if ( plen < 0 ) {
			mx_cache_error = -7;
			goto err;
		}
		len -= plen;
		offset_ix += plen;
	
		if ( len <= 0 )
			break;
	
		p.dc[1] += plen << (p.dc[0]*m->dim_divide[0]);
		for ( i = 0 ; i < m->p.dim ; i ++ ) {
			if ( p.dc[i+1] >= m->pixel_size[i] ) {
				mx_cache_error = -8;
				goto err;
			}
			if ( p.dc[i+1] < 0 ) {
				mx_cache_error = -9;
				goto err;
			}
		}
		if ( get_mx_cache(c,p.dc) < 0 ) {
			mx_cache_error = -10;
			goto err;
		}
		ofs[0] = 0;
	}
end:
	mxc_free(c,dc);
	mxc_free(c,ofs);
	return p.vector_len - len;
err:
	mxc_free(c,dc);
	mxc_free(c,ofs);
	return -1;
}



int
read_mx_cache_vector(MX_CACHE_PARAM * _p)
{
int i;
int * ofs;
//MATRIX_DATA_TYPE * tp;
int el_size;
void * p1;
MATRIX * m;
MX_CACHE * c;
INTEGER64 st,ix;
int pp,xx;
MATRIX_DH_SET * dsp;
int len,slen,tlen,plen;
int offset_ix;
char* buffer_ptr;
INTEGER64 * dc;
MX_CACHE_PARAM p;
int err_flag;

	err_flag = 0;
	p = *_p;
	if ( get_mx_cache(p.c,p.dc) < 0 )
		err_flag = 1;
	c = p.c;
	m = c->m;
	ofs = mxc_alloc(c,sizeof(int)*m->p.dim);
	if ( p.ofs ) {
		for ( i = 0 ; i < m->p.dim ; i ++ ) {
			st = p.dc[i+1] >> (p.dc[0] * m->dim_divide[i]);
			ix = p.ofs[i] + st;
			st &= -(((INTEGER64)1)<<m->block_size[i]);
			ofs[i] = ix - st;
		}
	}
	else {
		for ( i = 0 ; i < m->p.dim ; i ++ )
			ofs[i] = (p.dc[i+1] >> (p.dc[0]
					* m->dim_divide[i])) &
					((((INTEGER64)1)
						<<m->block_size[i])-1);
	}
	if ( (p.flags & MXCF_OPTIM_1) == 0 && err_flag == 0 )
		if ( mx_cache_boundary(p.c,ofs) < 0 ) {
			mxc_free(p.c,ofs);
			return -1;
		}

	dc = mxc_alloc(c,sizeof(INTEGER64)*(m->p.dim+1));
	for ( i = 0 ; i < m->p.dim ; i ++ ) {
		if ( p.dc[i+1] >= m->pixel_size[i] )
			goto err;
		if ( p.dc[i+1] < 0 )
			goto err;
		dc[i+1] = p.dc[i+1];
	}
	dc[0] = p.dc[0];
	p.dc = dc;

	len = p.vector_len;
	offset_ix = 0;
	if ( len <= 0 )
		goto end;
	for ( ; len > 0 ; ) {
	
		plen = -1;

		if ( err_flag ) {
			slen = ((int)1)<<m->block_size[0];
			if ( slen > len )
				slen = len;
			plen = slen;
		}
		else {
			for ( i = 0 ; i < c->ds_len ; i ++ ) {
				xx = p.data_ix[i].x;
				if ( xx == MXC_INVALID )
					continue;
				pp = p.data_ix[i].p;
				dsp = &c->ds[i];
				if ( dsp->tp->parent ) {
					tlen = (dsp->ix[0] - ofs[0]) ; //&0xfffffffe;
					if ( tlen < len )
						slen = tlen;
					else	slen = len;
					if ( plen < 0 )
						plen = slen;
					else if ( plen != slen ) {
						goto err;
					}

					el_size = c->el_sizes[i];
					p1 = ((char*)dsp->offset)
						+ get_seq_from_ix(ofs,dsp->ix,
							m->p.dim) *
							el_size;
					buffer_ptr = ((char**)p.data_ptrs[pp])[xx];

					memcpy(buffer_ptr + offset_ix*el_size,
						p1,el_size*slen);
				}
				else {
					goto err;
				}
			}
		}
		if ( plen < -1 ) {
			goto err;
		}
		len -= plen;
		offset_ix += plen;
	
		if ( len <= 0 )
			break;
	
		p.dc[1] += plen << (p.dc[0]*m->dim_divide[0]);
		for ( i = 0 ; i < m->p.dim ; i ++ ) {
			if ( p.dc[i+1] >= m->pixel_size[i] )
				goto err;
			if ( p.dc[i+1] < 0 )
				goto err;
		}
		err_flag = 0;
		if ( get_mx_cache(c,p.dc) < 0 )
			err_flag = 1;
		ofs[0] = 0;
	}
end:
	mxc_free(c,dc);
	mxc_free(c,ofs);
	return p.vector_len - len;
err:
	mxc_free(c,dc);
	mxc_free(c,ofs);
	return -1;
}


int
_read_mx_cache_block_vector(MX_CACHE_PARAM * _p,int axis)
{
MX_CACHE_PARAM p;
MX_CACHE * c;
MATRIX * m;
int dim;
INTEGER64 block_size;
INTEGER64 max;
int er;
int memory_block_size;


int * ofs;
int * end_ofs;
int * inc_ofs;
int * target_src;
int * target_dest;
INTEGER64 st,ix;
int pp,xx;
MATRIX_DH_SET * dsp;
char * buffer_ptr;
int i,j;
int pp1,pp2;
char * p1,* p2;
int len;
int el_size;

	p = *_p;
	c = p.c;
	m = c->m;
	dim = m->p.dim;
	if ( axis >= dim ) {
		er = 0;
		if ( get_mx_cache(c,p.dc) < 0 )
			return -1;
		ofs = mxc_alloc(c,sizeof(int)*m->p.dim);
		end_ofs = mxc_alloc(c,sizeof(int)*m->p.dim);
		inc_ofs = mxc_alloc(c,sizeof(int)*m->p.dim);
		target_src = mxc_alloc(c,sizeof(int)*m->p.dim);
		target_dest = mxc_alloc(c,sizeof(int)*m->p.dim);
		if ( p.ofs ) {
			for ( i = 0 ; i < m->p.dim ; i ++ ) {
				st = p.dc[i+1] >> (p.dc[0] * m->dim_divide[i]);
				ix = p.ofs[i] + st;
				st &= -(((INTEGER64)1)<<m->block_size[i]);
				ofs[i] = ix - st;
				
				end_ofs[i] = p.matrix_max_hrect[i+1] >> (p.dc[0] * m->dim_divide[i]);
				inc_ofs[i] = 1;
			}
		}
		else {
			for ( i = 0 ; i < m->p.dim ; i ++ ) {
				ofs[i] = (p.dc[i+1] >> (p.dc[0]
						* m->dim_divide[i])) &
						((((INTEGER64)1)
							<<m->block_size[i])-1);

				end_ofs[i] = p.matrix_max_hrect[i+1] >> (p.dc[0] * m->dim_divide[i]);
				inc_ofs[i] = 1;
			}
		}
		
		if ( p.vector_valid ) {
			for ( i = 0 ; i < c->ds_len ; i ++ ) {
				xx = p.data_ix[i].x;
				if ( xx == MXC_INVALID )
					continue;
				pp = p.data_ix[i].p;
				if ( pp == MXC_INVALID )
					continue;
				buffer_ptr = ((char**)p.data_ptrs[pp])[xx];
				dsp = &c->ds[i];
				if ( dsp == 0 )
					continue;
				if ( dsp->tp->parent ) {
					el_size = c->el_sizes[i];
					for ( j = 0 ; j < m->p.dim ; j ++ )
						target_src[j] = ofs[j];
					for ( ; ; ) {
						for ( j = 0 ; j < m->p.dim ; j ++ )
							target_dest[j] = 
								target_src[j] - ofs[j] + 
								p.memory_block_offset[j];
						pp1 = get_seq_from_ix(target_src,dsp->ix,m->p.dim);
						pp2 = get_seq_from_ix(target_dest,
								p.memory_block_size,m->p.dim);
						if ( p.vector_valid[pp2] == 0 ) {
							p.result_valid[pp2] = 0;
							goto next;
						}
						p1 = ((char*)dsp->offset) + pp1 * el_size;
						p2 = buffer_ptr + pp2 * el_size;
						memcpy(p2,p1,el_size);
						if ( p.result_valid )
							p.result_valid[pp2] = 1;
					next:
						if ( inc_ix(target_src,ofs,inc_ofs,end_ofs,m->p.dim) )
							break;
					}
					if ( p.access_valid )
						p.access_valid[i] = 1;
				}
				else {
					er = -1;
					goto err1;
				}
			}
		}
		else {
			len = end_ofs[0] - ofs[0];
			end_ofs[0] = ofs[0]+1;
			for ( i = 0 ; i < c->ds_len ; i ++ ) {
				xx = p.data_ix[i].x;
				if ( xx == MXC_INVALID )
					continue;
				pp = p.data_ix[i].p;
				if ( pp == MXC_INVALID )
					continue;
				buffer_ptr = ((char**)p.data_ptrs[pp])[xx];
				dsp = &c->ds[i];
				if ( dsp == 0 )
					continue;
				if ( dsp->tp->parent ) {
					el_size = c->el_sizes[i];
					for ( j = 0 ; j < m->p.dim ; j ++ )
						target_src[j] = ofs[j];
					for ( ; ; ) {
						for ( j = 0 ; j < m->p.dim ; j ++ )
							target_dest[j] = 
								target_src[j] - ofs[j] + 
								p.memory_block_offset[j];
						p1 = ((char*)dsp->offset)
							+ (pp1=get_seq_from_ix(target_src,dsp->ix,
								m->p.dim)) *
								el_size;
						p2 = buffer_ptr 
							+ (pp2=get_seq_from_ix(target_dest,
								p.memory_block_size,m->p.dim))
								* el_size;
						memcpy(p2,p1,el_size*len);
						if ( p.result_valid ) {
							p2 = (char*)(p.result_valid + pp2);
							for ( j = 0 ; j < len ; j ++ )
								*p2++ = 1;
						}
						if ( inc_ix(target_src,ofs,inc_ofs,end_ofs,m->p.dim) )
							break;
					}
					if ( p.access_valid )
						p.access_valid[i] = 1;
				}
				else {
					er = -1;
					goto err1;
				}
			}
		}
	err1:
		mxc_free(c,ofs);
		mxc_free(c,inc_ofs);
		mxc_free(c,end_ofs);
		mxc_free(c,target_src);
		mxc_free(c,target_dest);
		return er;
	}
	p.memory_block_offset = mxc_alloc(c,sizeof(int)*dim);
	memcpy(p.memory_block_offset,_p->memory_block_offset,sizeof(int)*dim);
	p.dc = mxc_alloc(c,sizeof(INTEGER64)*(dim+1));
	memcpy(p.dc,_p->dc,sizeof(INTEGER64)*(dim+1));
	p.matrix_max_hrect = mxc_alloc(c,sizeof(INTEGER64)*(dim+1));
	memcpy(p.matrix_max_hrect,_p->matrix_max_hrect,sizeof(INTEGER64)*(dim+1));
	
	block_size = ((INTEGER64)1)<<(((INTEGER64)m->dim_divide[axis])*p.dc[0]+m->block_size[axis]);
	max = _p->matrix_max_hrect[axis+1];
	
	p.matrix_max_hrect[axis+1] = (p.dc[axis+1]+block_size)&(-block_size);
	memory_block_size = 1<<m->block_size[axis];
	
	er = 0;
	for ( ; p.dc[axis+1] < max ; ) {
		if ( p.matrix_max_hrect[axis+1] > max )
			p.matrix_max_hrect[axis+1] = max;
		er = _read_mx_cache_block_vector(&p,axis+1);
		if ( er < 0 )
			break;
		p.ofs = 0;
		p.dc[axis+1] = p.matrix_max_hrect[axis+1];
		p.matrix_max_hrect[axis+1] += block_size;
		p.memory_block_offset[axis] += memory_block_size;
	}
	
	mxc_free(c,p.memory_block_offset);
	mxc_free(c,p.dc);
	mxc_free(c,p.matrix_max_hrect);
	return er;
}


int
read_mx_cache_block_vector(MX_CACHE_PARAM * p)
{
int i;
int size;
MATRIX * m;
	m = p->c->m;
	for ( i = 0 ; i < m->p.dim ; i ++ ) {
		if ( p->memory_block_size[i] <= 0 )
			return -2;
		if ( p->memory_block_size[i] <= p->memory_block_offset[i] )
			return -3;
		if ( p->memory_block_offset[i] < 0 )
			return -4;
		if ( p->dc[i] < 0 )
			return -5;
		if ( p->dc[i] >= p->matrix_max_hrect[i] )
			return -6;
		if ( p->matrix_max_hrect[i] >= m->pixel_size[i] )
			return -7;
	}
	if ( p->result_valid ) {
		size = 1;
		for ( i = 0 ; i < m->p.dim ; i ++ )
			size *= p->memory_block_size[i];
		memset(p->result_valid,0,size);
	}
	return _read_mx_cache_block_vector(p,0);
}



int
_write_mx_cache_block_vector(MX_CACHE_PARAM * _p,int axis)
{
MX_CACHE_PARAM p;
MX_CACHE * c;
MATRIX * m;
int dim;
INTEGER64 block_size;
INTEGER64 max;
int er;
int memory_block_size;


int * ofs;
int * end_ofs;
int * inc_ofs;
int * target_src;
int * target_dest;
INTEGER64 st,ix;
int pp,xx;
MATRIX_DH_SET * dsp;
char * buffer_ptr;
int i,j;
int pp1,pp2;
char * p1,* p2;
int len;
int el_size;

	p = *_p;
	c = p.c;
	m = c->m;
	dim = m->p.dim;
	if ( axis >= dim ) {
		er = 0;
		if ( get_mx_cache(c,p.dc) < 0 )
			return -1;
		ofs = mxc_alloc(c,sizeof(int)*m->p.dim);
		end_ofs = mxc_alloc(c,sizeof(int)*m->p.dim);
		inc_ofs = mxc_alloc(c,sizeof(int)*m->p.dim);
		target_src = mxc_alloc(c,sizeof(int)*m->p.dim);
		target_dest = mxc_alloc(c,sizeof(int)*m->p.dim);
		if ( p.ofs ) {
			for ( i = 0 ; i < m->p.dim ; i ++ ) {
				st = p.dc[i+1] >> (p.dc[0] * m->dim_divide[i]);
				ix = p.ofs[i] + st;
				st &= -(((INTEGER64)1)<<m->block_size[i]);
				ofs[i] = ix - st;
				
				end_ofs[i] = p.matrix_max_hrect[i+1] >> (p.dc[0] * m->dim_divide[i]);
				inc_ofs[i] = 1;
			}
		}
		else {
			for ( i = 0 ; i < m->p.dim ; i ++ ) {
				ofs[i] = (p.dc[i+1] >> (p.dc[0]
						* m->dim_divide[i])) &
						((((INTEGER64)1)
							<<m->block_size[i])-1);

				end_ofs[i] = p.matrix_max_hrect[i+1] >> (p.dc[0] * m->dim_divide[i]);
				inc_ofs[i] = 1;
			}
		}
		
		if ( p.vector_valid ) {
			for ( i = 0 ; i < c->ds_len ; i ++ ) {
				xx = p.data_ix[i].x;
				if ( xx == MXC_INVALID )
					continue;
				pp = p.data_ix[i].p;
				if ( pp == MXC_INVALID )
					continue;
				buffer_ptr = ((char**)p.data_ptrs[pp])[xx];
				dsp = &c->ds[i];
				if ( dsp->tp == 0 )
					continue;
				if ( dsp->tp->parent ) {
					el_size = c->el_sizes[i];
					for ( j = 0 ; j < m->p.dim ; j ++ )
						target_src[j] = ofs[j];
					for ( ; ; ) {
						for ( j = 0 ; j < m->p.dim ; j ++ )
							target_dest[j] = 
								target_src[j] - ofs[j] + 
								p.memory_block_offset[j];
						pp1 = get_seq_from_ix(target_src,dsp->ix,m->p.dim);
						pp2 = get_seq_from_ix(target_dest,
								p.memory_block_size,m->p.dim);
						if ( p.vector_valid[pp2] == 0 ) {
							p.result_valid[pp2] = 0;
							goto next;
						}
						p1 = ((char*)dsp->offset) + pp1 * el_size;
						p2 = buffer_ptr + pp2 * el_size;
						memcpy(p1,p2,el_size);
						if ( p.result_valid )
							p.result_valid[pp2] = 1;
					next:
						if ( inc_ix(target_src,ofs,inc_ofs,end_ofs,m->p.dim) )
							break;
					}
					if ( p.access_valid )
						p.access_valid[i] = 1;
				}
				else {
					er = -1;
					goto err1;
				}
			}
		}
		else {
			len = end_ofs[0] - ofs[0];
			end_ofs[0] = ofs[0]+1;
			for ( i = 0 ; i < c->ds_len ; i ++ ) {
				xx = p.data_ix[i].x;
				if ( xx == MXC_INVALID )
					continue;
				pp = p.data_ix[i].p;
				if ( pp == MXC_INVALID )
					continue;
				buffer_ptr = ((char**)p.data_ptrs[pp])[xx];
				dsp = &c->ds[i];
				if ( dsp->tp == 0 )
					continue;
				if ( dsp->tp->parent ) {
					el_size = c->el_sizes[i];
					for ( j = 0 ; j < m->p.dim ; j ++ )
						target_src[j] = ofs[j];
					for ( ; ; ) {
						for ( j = 0 ; j < m->p.dim ; j ++ )
							target_dest[j] = 
								target_src[j] - ofs[j] + 
								p.memory_block_offset[j];
						p1 = ((char*)dsp->offset)
							+ (pp1=get_seq_from_ix(target_src,dsp->ix,
								m->p.dim)) *
								el_size;
						p2 = buffer_ptr 
							+ (pp2=get_seq_from_ix(target_dest,
								p.memory_block_size,m->p.dim))
								* el_size;
						memcpy(p1,p2,el_size*len);
						if ( p.result_valid ) {
							p2 = (char*)(p.result_valid + pp2);
							for ( j = 0 ; j < len ; j ++ )
								*p2++ = 1;
						}
						if ( inc_ix(target_src,ofs,inc_ofs,end_ofs,m->p.dim) )
							break;
					}
					if ( p.access_valid )
						p.access_valid[i] = 1;
				}
				else {
					er = -1;
					goto err1;
				}
			}
		}
	err1:
		mxc_free(c,ofs);
		mxc_free(c,inc_ofs);
		mxc_free(c,end_ofs);
		mxc_free(c,target_src);
		mxc_free(c,target_dest);
		return er;
	}
	p.memory_block_offset = mxc_alloc(c,sizeof(int)*dim);
	memcpy(p.memory_block_offset,_p->memory_block_offset,sizeof(int)*dim);
	p.dc = mxc_alloc(c,sizeof(INTEGER64)*(dim+1));
	memcpy(p.dc,_p->dc,sizeof(INTEGER64)*(dim+1));
	p.matrix_max_hrect = mxc_alloc(c,sizeof(INTEGER64)*(dim+1));
	memcpy(p.matrix_max_hrect,_p->matrix_max_hrect,sizeof(INTEGER64)*(dim+1));
	
	block_size = ((INTEGER64)1)<<(((INTEGER64)m->dim_divide[axis])*p.dc[0]+m->block_size[axis]);
	max = _p->matrix_max_hrect[axis+1];
	
	p.matrix_max_hrect[axis+1] = (p.dc[axis+1]+block_size)&(-block_size);
	memory_block_size = 1<<m->block_size[axis];
	
	er = 0;
	for ( ; p.dc[axis+1] < max ; ) {
		if ( p.matrix_max_hrect[axis+1] > max )
			p.matrix_max_hrect[axis+1] = max;
		er = _read_mx_cache_block_vector(&p,axis+1);
		if ( er < 0 )
			break;
		p.ofs = 0;
		p.dc[axis+1] = p.matrix_max_hrect[axis+1];
		p.matrix_max_hrect[axis+1] += block_size;
		p.memory_block_offset[axis] += memory_block_size;
	}
	
	mxc_free(c,p.memory_block_offset);
	mxc_free(c,p.dc);
	mxc_free(c,p.matrix_max_hrect);
	return er;
}


int
write_mx_cache_block_vector(MX_CACHE_PARAM * p)
{
int i;
int size;
MATRIX * m;
	m = p->c->m;
	for ( i = 0 ; i < m->p.dim ; i ++ ) {
		if ( p->memory_block_size[i] <= 0 )
			return -2;
		if ( p->memory_block_size[i] <= p->memory_block_offset[i] )
			return -3;
		if ( p->memory_block_offset[i] < 0 )
			return -4;
		if ( p->dc[i] < 0 )
			return -5;
		if ( p->dc[i] >= p->matrix_max_hrect[i] )
			return -6;
		if ( p->matrix_max_hrect[i] >= m->pixel_size[i] )
			return -7;
	}
	if ( p->result_valid ) {
		size = 1;
		for ( i = 0 ; i < m->p.dim ; i ++ )
			size *= p->memory_block_size[i];
		memset(p->result_valid,0,size);
	}
	if ( p->access_valid ) {
		for ( i = 0 ; i < p->c->ds_len ; i ++ )
			p->access_valid = 0;
	}
	return _write_mx_cache_block_vector(p,0);
}




typedef struct mx_hem_t {
	MX_CACHE_PARAM *	p;
	int			size;
} MX_HEM_T;

int
mx_hem_func(MATRIX_SCAN_T * sw)
{
MX_HEM_T * h;
MATRIX * m;
int * hem_ix;
int * ofs;
INTEGER64 * start_ix;
INTEGER64 * end_ix;
int * inc;
int i,j;
MATRIX_NODE * n;
MX_CACHE * c;
MATRIX_DH_SET ds;
	h = sw->work;
	c = h->p->c;
	m = c->m;

	ofs = d_alloc(sizeof(int)*m->p.dim);
	hem_ix = d_alloc(sizeof(int)*m->p.dim);
	start_ix = d_alloc(sizeof(INTEGER64)*m->p.dim);
	end_ix = d_alloc(sizeof(INTEGER64)*m->p.dim);
	inc = d_alloc(sizeof(int)*m->p.dim);

	n = sw->n;
	ds.tp = m->channel_info[c->access_ch[0]].data_type;
	get_matrix_dh_set(&ds,n->channel[c->access_ch[0]].data);

	for ( i = 0 ; i < m->p.dim ; i ++ ) {
		for ( j = 0 ; j < m->p.dim ; j ++ ) {
			if ( j == i ) {
				ofs[j] = ds.ix[j]-1;
				end_ix[j+1] = sw->dim_code[j+1] + ds.ix[j];
			}
			else if ( j < i ) {
				ofs[j] = 0;
				end_ix[j+1] = sw->dim_code[j+1] + ds.ix[j]-1;
			}
			else {
				ofs[j] = 0;
				end_ix[j+1] = sw->dim_code[j+1] + ds.ix[j];
			}
			start_ix[j+1] = sw->dim_code[j+1]+ofs[j];
		}
		start_ix[0] = end_ix[0] = sw->dim_code[0];
		
		for ( j = 0 ; j < h->p->c->ds_len ; j ++ )
			h->p->data_ix[j].p = 0;

		h->p->matrix_max_hrect = end_ix;
		h->p->ofs = 0;
		h->p->dc = start_ix;
		h->p->vector_valid = 0;
		
		read_mx_cache_block_vector(h->p);
		
		for ( j = 0 ; j < h->p->c->ds_len ; j ++ ) {
			if ( h->p->access_valid[j] == 0 )
				h->p->data_ix[j].p = MXC_INVALID;
			else	h->p->data_ix[j].p = 0;
		}
		for ( j = 0 ; j < h->size ; j ++ )
			if ( h->p->result_valid[j] == 0 )
				goto vv;
		goto ok;
	vv:
		h->p->vector_valid = h->p->result_valid;
		h->p->result_valid = 0;
	ok:
		h->p->ofs = ofs;
		h->p->dc = sw->dim_code;
		write_mx_cache_block_vector(h->p);
		
		if ( h->p->vector_valid ) {
			h->p->result_valid = h->p->vector_valid;
			h->p->vector_valid = 0;
		}
	}
	
	d_f_ree(ofs);
	d_f_ree(hem_ix);
	d_f_ree(start_ix);
	d_f_ree(end_ix);
	d_f_ree(inc);

	return 0;
}

int
mx_hem(MATRIX * m)
{
int i,j;
MX_HEM_T h;
INTEGER64 * start_dc;
INTEGER64 * end_dc;
MX_CACHE c;
MX_CACHE_PARAM p;
int sz;
int er;
int size;
int a;
MATRIX_DATA_TYPE * tp;


	sync_matrix(m);

	memset(&c,0,sizeof(c));
	memset(&p,0,sizeof(p));
	
	
	
	c.m = m;
	c.access_ch = d_alloc(sizeof(int)*m->p.channel_nos);
	c.gn.gn_tree_node = GN_NODE;
	c.gn.gn_create = GN_ERROR_NORETRY;
	c.gn.gn_wait = GN_ERROR_NORETRY;

	p.data_ix = d_alloc(sizeof(MX_CACHE_PARAM_IX)*m->p.channel_nos);

	p.data_ptrs[0] = d_alloc(sizeof(void*)*m->p.channel_nos);
	p.data_ptrs[1] = p.data_ptrs[2] = p.data_ptrs[3] = 0;
	p.c = &c;
	p.vector_valid = 0;
	p.memory_block_size = d_alloc(sizeof(int)*m->p.dim);
	p.memory_block_offset = d_alloc(sizeof(int)*m->p.dim);
	p.matrix_max_hrect = d_alloc(sizeof(INTEGER64)*m->p.dim);
	
	size = 1;
	for ( i = 0 ; i < m->p.dim ; i ++ ) {
		a =  (((int)1)<<m->dim_divide[i])+1;
		size *= a;
		p.memory_block_size[i] = a;
		p.memory_block_offset[i] = 0;
	}
	p.result_valid = d_alloc(size);

	j = 0;
	h.p = &p;
	for ( i = 0 ; i < m->p.channel_nos ; i ++ ) {
		if ( m->channel_info[i].data_type == 0 ) {
			continue;
		}
		else {
			c.access_ch[j] = i;
			tp = m->channel_info[i].data_type;
			if ( tp->parent == 0 )
				continue;
			sz = (*tp->parent->get_size)(tp,0);
			p.data_ix[j].p = 0;
			p.data_ix[j].x = j;
			((char**)p.data_ptrs[0])[j] = d_alloc(sz*size);
			j ++;
		}
	}
	c.ds_len = j;
	mxc_setup(&c);


	start_dc = d_alloc(sizeof(INTEGER64)*(m->p.dim+1));
	end_dc = d_alloc(sizeof(INTEGER64)*(m->p.dim+1));
	for ( i = 0 ; i < m->p.dim ; i ++ ) {
		start_dc[i+1] = 0;
		end_dc[i+1] = m->pixel_size[i];
	}
	start_dc[0] = end_dc[0] = 0;

	h.size = size;


	er = matrix_scan(m,0,-1,start_dc,end_dc,mx_hem_func,&h);

	d_f_ree(start_dc);
	d_f_ree(end_dc);
	d_f_ree(p.result_valid);
	d_f_ree(p.memory_block_size);
	d_f_ree(p.memory_block_offset);
	d_f_ree(p.matrix_max_hrect);
	d_f_ree(p.data_ix);
	d_f_ree(c.access_ch);
	
	for ( i = 0 ; i < c.ds_len ; i ++ )
		d_f_ree(((char**)p.data_ptrs[0]));
	d_f_ree(p.data_ptrs[0]);
	
	return 0;
}








