=========================================================
ϡ
linux-2.6.26.5/Documentation/filesystems/inotify.txt 
Ǥ
Ρ JF ץ < http://www.linux.or.jp/JF/ >
  2008/10/15
  Seiji Kaneko < skaneko at mbn dot or dot jp >
=========================================================
				   inotify
#	    a powerful yet simple file change notification system
		    ϤñʥեѹΥƥ



#Document started 15 Mar 2005 by Robert Love <rml@novell.com>
ʸ񲽤 15 Mar 2005  Robert Love <rml@novell.com> ˤäƳϤ



#(i) User Interface
(i) 桼󥿡ե

#Inotify is controlled by a set of three system calls and normal file I/O on a
#returned file descriptor.
Inotify  3 ĤΥƥॳȤȡ֤줿եǥץФ
̾ I/O 椵ޤ

#First step in using inotify is to initialise an inotify instance:
inotify Ȥκǽμϡinotify 󥹥󥹤뤳ȤǤ

	int fd = inotify_init ();

#Each instance is associated with a unique, ordered queue.
ƥ󥹥󥹤ϡŤ줿󥹥̤Υ塼˴ϢŤƤޤ

#Change events are managed by "watches".  A watch is an (object,mask) pair where
#the object is a file or directory and the mask is a bit mask of one or more
#inotify events that the application wishes to receive.  See <linux/inotify.h>
#for valid events.  A watch is referenced by a watch descriptor, or wd.
٥Ȥϡ"watch" Ǵޤ watch (ƻ)  (о, ޥ)
ΥڥǤꡢоݤȤϰĤΥեޤϥǥ쥯ȥǡޥϥӥåȥޥ
Ǥꡢץꥱ󤬼Ȼפ inotify ٥ȤĤʤʣ
ꤷޤͭʥ٥ȤˤĤƤ <linux/inotify.h> 򻲾Ȥ
watch  watch ǥץ (wd) ȤäƻȤޤ

#Watches are added via a path to the file.
watch ϡեؤΥѥȤäղäޤ

#Watches on a directory will return events on any files inside of the directory.
ǥ쥯ȥФ watch ϡǥ쥯ȥΤɤΥեФ륤٥
𤷤ޤ

#Adding a watch is simple:
watch ɲäΤϴñǤ

	int wd = inotify_add_watch (fd, path, mask);

#Where "fd" is the return value from inotify_init(), path is the path to the
#object to watch, and mask is the watch mask (see <linux/inotify.h>).
 wd  inotify_init() ͤǤꡢpath ϴƻоݤΥѥǤꡢ
mask  watch ޥǤ (<linux/inotify.h> )

#You can update an existing watch in the same manner, by passing in a new mask.
 mask Ϳ뤳ȤˤꡢƱǴ¸ watch 򹹿Ǥޤ

#An existing watch is removed via
¸ watch ϰʲμǺǤޤ

	int ret = inotify_rm_watch (fd, wd);

#Events are provided in the form of an inotify_event structure that is read(2)
#from a given inotify instance.  The filename is of dynamic length and follows
#the struct. It is of size len.  The filename is padded with null bytes to
#ensure proper alignment.  This padding is reflected in len.
٥Ȥ inotify_event ¤Τη󶡤졢Ϳ줿 inotify 󥹥
ɤ (read(2)) ȤǤޤե̾ưŪĹꤵ졢¤
Τ³ޤ̾Ĺ size len Ǥե̾Ŭڤʥ饤ȤȤʤ
褦 null ХȤǥѥǥ󥰤ޤlen ϥѥǥ󥰤ޤĹǤ

#You can slurp multiple events by passing a large buffer, for example
ʣΥ٥ȤĹХåեͿ뤳ȤˤäƤƤȤǤޤ
㤨

	size_t len = read (fd, buf, BUF_LEN);

#Where "buf" is a pointer to an array of "inotify_event" structures at least
#BUF_LEN bytes in size.  The above example will return as many events as are
#available and fit in BUF_LEN.
ǡbuf  "inotify_event" ¤ؤΥݥ󥿤ǡʤȤ BUF_LEN
ХȤΥޤ嵭ǤϡBUF_LEN ˳ǼǤ롢󶡲ǽ
Υ٥Ȥ֤ޤ

#Each inotify instance fd is also select()- and poll()-able.
 inotify 󥹥󥹤Υեǥץ select()  poll() μ¹
ǽǤ

#You can find the size of the current event queue via the standard FIONREAD
#ioctl on the fd returned by inotify_init().
ߤΥ٥ȥ塼Ĺϡinotify_init() ֤եǥץ
Фɸ FIONREAD ioctl ȯԤ뤳ȤˤΤ뤳ȤǤޤ

#All watches are destroyed and cleaned up on close.
٤Ƥ watch ϥ˲ˤޤ


#(ii) Prototypes:
(ii) ץȥ:

	int inotify_init (void);
	int inotify_add_watch (int fd, const char *path, __u32 mask);
	int inotify_rm_watch (int fd, __u32 mask);


#(iii) Kernel Interface
(iii) ͥ륤󥿡ե

#Inotify's kernel API consists a set of functions for managing watches and an
#event callback.
Inotify Υͥ API ϡwatch ٥Ȥ뤿δĤδؿ
ʤäƤޤ

#To use the kernel API, you must first initialize an inotify instance with a set
#of inotify_operations.  You are given an opaque inotify_handle, which you use
#for any further calls to inotify.
ͥ API Ȥˤϡޤ inotify 󥹥󥹤Ϣ inotify 
ؿǽʤФޤ󡣤ˤꡢʹߤ inotify ѤƤӽ
ԤʤΤ˻Ȥ̤Ƥ inotify_handle Ǥޤ

    struct inotify_handle *ih = inotify_init(my_event_handler);

#You must provide a function for processing events and a function for destroying
#the inotify watch.
٥Ȥؿȡinotify watch ˴ؿѰդʤФ
ޤ

    void handle_event(struct inotify_watch *watch, u32 wd, u32 mask,
    	              u32 cookie, const char *name, struct inode *inode)

#	watch - the pointer to the inotify_watch that triggered this call
#	wd - the watch descriptor
#	mask - describes the event that occurred
#	cookie - an identifier for synchronizing events
#	name - the dentry name for affected files in a directory-based event
#	inode - the affected inode in a directory-based event
	watch - θƤӽФȯ inotify_watch ؤΥݥ
	wd - watch ǥץ
	mask - ȯ٥Ȥ򵭺ܤ
	cookie - Ʊ٥Ȥμ̻
	name - ǥ쥯ȥ١Υ٥Ȥξ硢ƶե
	     dentry ̾
	inode -ǥ쥯ȥ١Υ٥Ȥξ硢ƶ inode

    void destroy_watch(struct inotify_watch *watch)

#You may add watches by providing a pre-allocated and initialized inotify_watch
#structure and specifying the inode to watch along with an inotify event mask.
#You must pin the inode during the call.  You will likely wish to embed the
#inotify_watch structure in a structure of your own which contains other
#information about the watch.  Once you add an inotify watch, it is immediately
#subject to removal depending on filesystem events.  You must grab a reference if
#you depend on the watch hanging around after the call.
˳Ƥƽ줿 inotify_watch ¤Τ˴ƻ
ݤȤʤ inode  inotify ٥ȥޥꤹ뤳Ȥˤ watch ɲä
ޤƤӽФˤ inode ꤷƤʤФޤ󡣤ޤ
inotify_watch ¤Τǡ watch ˴ؤǼ
¤ΤߤʤǤ礦inotify_watch ɲäϡľ夫ե
ƥ।٥Ȥη̤ˤоݤȤʤޤƤӽФ watch Ĥä
ƤۤʤСݥ󥿤ĤޤޤˤƤʤФʤޤ

#    inotify_init_watch(&my_watch->iwatch);
#    inotify_get_watch(&my_watch->iwatch);	// optional
#    s32 wd = inotify_add_watch(ih, &my_watch->iwatch, inode, mask);
#    inotify_put_watch(&my_watch->iwatch);	// optional
    inotify_init_watch(&my_watch->iwatch);
    inotify_get_watch(&my_watch->iwatch);	// ץ
    s32 wd = inotify_add_watch(ih, &my_watch->iwatch, inode, mask);
    inotify_put_watch(&my_watch->iwatch);	// ץ

#You may use the watch descriptor (wd) or the address of the inotify_watch for
#other inotify operations.  You must not directly read or manipulate data in the
#inotify_watch.  Additionally, you must not call inotify_add_watch() more than
#once for a given inotify_watch structure, unless you have first called either
#inotify_rm_watch() or inotify_rm_wd().
watch ǥץ (wd)  inotify_watch Υɥ쥹Τɤ inotify 
Ѥ뤳ȤǤޤinotify_watch Υǡľɤ߽񤭤ƤϤ
ޤ󡣤ޤ inotify_watch ¤ΤФơޤ inotify_rm_watch() 
inotify_rm_wd() Τ줫ƤФˡʾ inotify_add_watch() ƤǤ
ޤ

#To determine if you have already registered a watch for a given inode, you may
#call inotify_find_watch(), which gives you both the wd and the watch pointer for
#the inotify_watch, or an error if the watch does not exist.
 inode Фƴ watch ϿƤ뤫ǧˤϡ
inotify_find_watch() ƤǤˤ inotify_watch  wd  watch
ݥ󥿤ξ졢watch ¸ߤʤʤ饨顼ˤʤޤ

    wd = inotify_find_watch(ih, inode, &watchp);

#You may use container_of() on the watch pointer to access your own data
#associated with a given watch.  When an existing watch is found,
#inotify_find_watch() bumps the refcount before releasing its locks.  You must
#put that reference with:
watch ݥ󥿤 container_of() Ȥäơ watch ˴ϢŤƤ뼫
Υǡ˥ԤʤȤǤޤ¸ watch ¸ߤ硢
inotify_find_watch() ϥå refcount á (+1) ޤλ
ϰʲΤ褦ˤʤФޤ

    put_inotify_watch(watchp);

#Call inotify_find_update_watch() to update the event mask for an existing watch.
#inotify_find_update_watch() returns the wd of the updated watch, or an error if
#the watch does not exist.
¸ watch Υ٥ȥޥ򹹿ˤ inotify_find_update_watch() 
Ǥinotify_find_update_watch() Ϲ줿 watch  wd ֤
watch ¸ߤʤˤϥ顼֤ޤ

    wd = inotify_find_update_watch(ih, inode, mask);

#An existing watch may be removed by calling either inotify_rm_watch() or
#inotify_rm_wd().
¸ watch  inotify_rm_watch()  inotify_rm_wd() Ƥ֤Ȥˤ
ޤ

    int ret = inotify_rm_watch(ih, &my_watch->iwatch);
    int ret = inotify_rm_wd(ih, wd);

#A watch may be removed while executing your event handler with the following:
٥ȥϥɥ椫 watch ʲΤ褦˺뤳ȤǤޤ

    inotify_remove_watch_locked(ih, iwatch);

#Call inotify_destroy() to remove all watches from your inotify instance and
#release it.  If there are no outstanding references, inotify_destroy() will call
#your destroy_watch op for each watch.
inotify 󥹥󥹤餹٤Ƥ watch ˤ inotify_destroy()
ƤӤޤĤäȤʤˤϡinotify_destroy() Ϥʤκ
destory_watch  watch ˤĤƤӽФޤ

    inotify_destroy(ih);

#When inotify removes a watch, it sends an IN_IGNORED event to your callback.
#You may use this event as an indication to free the watch memory.  Note that
#inotify may remove a watch due to filesystem events, as well as by your request.
#If you use IN_ONESHOT, inotify will remove the watch after the first event, at
#which point you may call the final inotify_put_watch.
inotify  watch ݤˤϡƤӸ IN_IGNORED ٥Ȥޤ
Υ٥Ȥ watch β򼨤ΤȤƻȤȤǤޤ
inotify Ϥʤ׵ʳˡե륷ƥ।٥Ȥ watch 
⤢ޤIN_ONESHOT Ȥ硢inotify ϺǽΥ٥Ȥ watch 
ޤλǺǸ inotify_put_watch Ƥ֤ȤǤޤ

#(iv) Kernel Interface Prototypes
(iv) ͥ륤󥿡եץȥ

	struct inotify_handle *inotify_init(struct inotify_operations *ops);

	inotify_init_watch(struct inotify_watch *watch);

	s32 inotify_add_watch(struct inotify_handle *ih,
		              struct inotify_watch *watch,
			      struct inode *inode, u32 mask);

	s32 inotify_find_watch(struct inotify_handle *ih, struct inode *inode,
			       struct inotify_watch **watchp);

	s32 inotify_find_update_watch(struct inotify_handle *ih,
				      struct inode *inode, u32 mask);

	int inotify_rm_wd(struct inotify_handle *ih, u32 wd);

	int inotify_rm_watch(struct inotify_handle *ih,
			     struct inotify_watch *watch);

	void inotify_remove_watch_locked(struct inotify_handle *ih,
					 struct inotify_watch *watch);

	void inotify_destroy(struct inotify_handle *ih);

	void get_inotify_watch(struct inotify_watch *watch);
	void put_inotify_watch(struct inotify_watch *watch);


#(v) Internal Kernel Implementation
(v) Υͥ

#Each inotify instance is represented by an inotify_handle structure.
#Inotify's userspace consumers also have an inotify_device which is
#associated with the inotify_handle, and on which events are queued.
 inotify 󥹥󥹤 inotify_handle ¤ΤǼ¸ƤޤInotify
Υ桼֤μ¦ϡinotify_handle ˷ӤĤ줿 inotify_device 
٥ȤϤ˥塼󥰤ޤ

#Each watch is associated with an inotify_watch structure.  Watches are chained
#off of each associated inotify_handle and each associated inode.
 watch  inotify_watch ¤Τ˴ϢŤƤޤwatch ϴϢŤ
줿 inotify_handle ȴϢŤ줿 inode ӤĤΤǤ

#See fs/inotify.c and fs/inotify_user.c for the locking and lifetime rules.
å¸֤˴ؤ뵬§ˤĤƤ  fs/inotify.c  fs/inotify_user.c 
Ȥ


#(vi) Rationale
(vi) 

#Q: What is the design decision behind not tying the watch to the open fd of
#   the watched object?
Q; ƻоݤΥ֥ȤΡץ󤵤Ƥեǥץƻ뤷
   ʤȤ߷ȽǤͳ򶵤Ƥ

#A: Watches are associated with an open inotify device, not an open file.
#   This solves the primary problem with dnotify: keeping the file open pins
#   the file and thus, worse, pins the mount.  Dnotify is therefore infeasible
#   for use on a desktop system with removable media as the media cannot be
#   unmounted.  Watching a file should not require that it be open.
A:  watch ϥץ󤵤줿 inotify ǥХ˷ӤĤƤꡢץ
    줿եǤϤޤ󡣤ˤä dnotify κ (ե
    򥪡ץ³뤳Ȥˤäƥեꡢ˥ޥȤꤷƤ
    ޤȸ) 褵ޤˤꡢdnotify ϥࡼХ֥
    ǥĥǥȥåץƥˤϸƤޤǤǥ
    ޥȤǤȴʤʤäƤޤǤեδƻ롢Ĥޤ
    watch ˤϡե򳫤Ȥ׵ᤵƤƤϤʤʤΤǤ

#Q: What is the design decision behind using an-fd-per-instance as opposed to
#   an fd-per-watch?
Q: եǥץ watch ǤϤʤեǥץ˥
   󥹤Ȥ߷ȽǤͳϡ

#A: An fd-per-watch quickly consumes more file descriptors than are allowed,
#   more fd's than are feasible to manage, and more fd's than are optimally
#   select()-able.  Yes, root can bump the per-process fd limit and yes, users
#   can use epoll, but requiring both is a silly and extraneous requirement.
#   A watch consumes less memory than an open file, separating the number
#   spaces is thus sensible.  The current design is what user-space developers
#   want: Users initialize inotify, once, and add n watches, requiring but one
#   fd and no twiddling with fd limits.  Initializing an inotify instance two
#   thousand times is silly.  If we can implement user-space's preferences
#   cleanly--and we can, the idr layer makes stuff like this trivial--then we
#   should.
A:  եǥץ watch ֤ϵեǥ
    ץ (fd) ξ¡Ĥޤ갷䤹ϰϤξ¤ĶƤޤޤ
    select() ǰΤŬĶƤޤޤroot ʤץ
     fd ξ¤Ķޤ桼 epoll ȤȤǤޤǤ
    ξ׵᤹Τϼ׵Ǥ礦watch ϳƤ
    򿩤ޤ󤷡ͳǤĤοʬۤ
    ʤǤ
    ߤμϥ桼֤γȯԤ˾ΡĤޤ桼ϰ٤ inotify
     n Ĥ watch äޤɬפʥեǥ
    ϰĤǡfd ξ¤򵤤ˤɬפϤޤ󡢤Ȥ˾ΤΤμ
    ˤʤäƤޤinotify 󥹥󥹤ΤϼƤ
    ⤷䤿桼֤Ǥ˾˼Ǥʤ顢Ƥϲ
    ǽǡ٤ȤʤΤǤ idr 쥤ˤꤴưפ˼¸Ǥ
    ޤ

#   There are other good arguments.  With a single fd, there is a single
#   item to block on, which is mapped to a single queue of events.  The single
#   fd returns all watch events and also any potential out-of-band data.  If
#   every fd was a separate watch,
    ʳˤɤͳޤĤ fd Ȥä硢֥å
    ݤϤҤȤĤˤʤꡢϥ٥ȤñΥ塼бդޤ
    ñ fd  watch ٥Ȥ֤ޤȥ֥ХɤΥǡ
    ФǼǤޤ⤷ fd ̡ watch бդƤ
    ʤ顢

#   - There would be no way to get event ordering.  Events on file foo and
#     file bar would pop poll() on both fd's, but there would be no way to tell
#     which happened first.  A single queue trivially gives you ordering.  Such
#     ordering is crucial to existing applications such as Beagle.  Imagine
#     "mv a b ; mv b a" events without ordering.
    - ٥Ȥνդˡʤե foo ȥե bar ǡξ
     fd Ф poll() ȯ뤳Ȥޤɤ餬˵ä
    Τˡޤ󡣥塼Ĥʤ̵ϷޤޤΤ褦
    ʽŤ Beagle Τ褦ʴ¸ΥץꥱɬܤˤʤäƤޤ
    㤨С"mv a b ; mv b a" ٥Ȥ˽ʤäѤǤ

#   - We'd have to maintain n fd's and n internal queues with state,
#     versus just one.  It is a lot messier in the kernel.  A single, linear
#     queue is the data structure that makes sense.
    - ĤǤϤʤn Ĥ fd  n Ĥξ֤ä塼ɬפ
    ޤϥͥϤ뤫ˤ㤴ˤޤñΡΥ
    塼ʥǡ¤ʤΤǤ

#   - User-space developers prefer the current API.  The Beagle guys, for
#     example, love it.  Trust me, I asked.  It is not a surprise: Who'd want
#     to manage and block on 1000 fd's via select?
    - 桼֤γȯԤϡ¸ API ˾ߤޤBeagle γȯԤϡ㤨С
    줬繥ǤޤѤƤäϤʤ褦ˤƤޤ
    顣ʤȤʤС줬 1000 Ĥ fd  select Ǵ֥å
    פǤ

#   - No way to get out of band data.
    - ȥ֥ХɤΥǡˡʤ

#   - 1024 is still too low.  ;-)
    - 1024 Ϥޤ ;-)

#   When you talk about designing a file change notification system that
#   scales to 1000s of directories, juggling 1000s of fd's just does not seem
#   the right interface.  It is too heavy.
    Ȥǥ쥯ȥ˳礹եѹΥƥ߷פǤϡ
     fd 򤪼̤Τ󥿡եǤϤʤǤޤ˽Ť
    ޤ

#   Additionally, it _is_ possible to  more than one instance  and
#   juggle more than one queue and thus more than one associated fd.  There
#   need not be a one-fd-per-process mapping; it is one-fd-per-queue and a
#   process can easily want more than one queue.
    ޤİʾΥ󥹥󥹡İʾΥ塼ƴϢŤ줿
    İʾ fd ĤȤǽǤ̤ˡץ˰Ĥ fd Ȥɬ
    櫓ǤϤޤ󡣰ĤΥ塼˰Ĥ fd ȤȤǤꡢץ
    ưפİʾΥ塼ĤȤǤޤ

#Q: Why the system call approach?
Q: ƥॳȤץäΤϤʤǤ

#A: The poor user-space interface is the second biggest problem with dnotify.
#   Signals are a terrible, terrible interface for file notification.  Or for
#   anything, for that matter.  The ideal solution, from all perspectives, is a
#   file descriptor-based one that allows basic file I/O and poll/select.
#   Obtaining the fd and managing the watches could have been done either via a
#   device file or a family of new system calls.  We decided to implement a
#   family of system calls because that is the preferred approach for new kernel
#   interfaces.  The only real difference was whether we wanted to use open(2)
#   and ioctl(2) or a couple of new system calls.  System calls beat ioctls.
A:  桼ڡ󥿡եϤ dnotify ܤ礭Ǥ
    եΤΤΥʥϡ¤ˡ¤ˤҤɤ󥿡եǤ
    ˤϡ٤ŬƤʤǤ礦̤Ūʲϡ
    ǥץ١Ǵ I/O  poll/select ΤǤ
    fd ꤷ watch ΤϡǥХեȤˡǤ⡢Ϣ
    ƥॳοߤΤǤǽǤϢΥƥॳ
    Τϡͥ륤󥿡եˤϤŬƤ
    Ǥΰ㤤Ȥϡopen(2)  ioctl(2) Ȥƥॳ
    ˤʤΤǤƥॳ ioctl ɤ˷ޤ
    Ƥޤ
