From 8348405ab347c7759631e69ba1370eeac11dadff Mon Sep 17 00:00:00 2001 From: Guillem Jover Date: Mon, 25 Feb 2019 01:56:55 +0000 Subject: [PATCH] Import libaio_0.3.112.orig.tar.xz [dgit import orig libaio_0.3.112.orig.tar.xz] --- .gitignore | 10 + COPYING | 515 +++++++++++++++++++++++++++++++++++++ ChangeLog | 54 ++++ INSTALL | 18 ++ Makefile | 39 +++ README.md | 12 + TODO | 4 + harness/Makefile | 56 ++++ harness/README | 19 ++ harness/attic/0.t | 9 + harness/attic/1.t | 9 + harness/cases/10.t | 53 ++++ harness/cases/11.t | 39 +++ harness/cases/12.t | 57 ++++ harness/cases/13.t | 66 +++++ harness/cases/14.t | 97 +++++++ harness/cases/15.t | 94 +++++++ harness/cases/16.t | 104 ++++++++ harness/cases/17.t | 265 +++++++++++++++++++ harness/cases/18.t | 115 +++++++++ harness/cases/19.t | 253 ++++++++++++++++++ harness/cases/2.t | 41 +++ harness/cases/20.t | 178 +++++++++++++ harness/cases/21.t | 176 +++++++++++++ harness/cases/22.t | 149 +++++++++++ harness/cases/3.t | 25 ++ harness/cases/4.t | 72 ++++++ harness/cases/5.t | 54 ++++ harness/cases/6.t | 57 ++++ harness/cases/7.t | 30 +++ harness/cases/8.t | 30 +++ harness/cases/aio_setup.h | 108 ++++++++ harness/cases/common-7-8.h | 38 +++ harness/ext2-enospc.img | Bin 0 -> 409600 bytes harness/main.c | 40 +++ harness/runtests.sh | 19 ++ libaio.spec | 221 ++++++++++++++++ man/io.3 | 351 +++++++++++++++++++++++++ man/io_cancel.3 | 65 +++++ man/io_fsync.3 | 82 ++++++ man/io_getevents.3 | 134 ++++++++++ man/io_prep_fsync.3 | 89 +++++++ man/io_prep_pread.3 | 79 ++++++ man/io_prep_pwrite.3 | 77 ++++++ man/io_queue_init.3 | 63 +++++ man/io_queue_release.3 | 48 ++++ man/io_queue_run.3 | 50 ++++ man/io_queue_wait.3 | 56 ++++ man/io_set_callback.3 | 44 ++++ man/io_submit.3 | 135 ++++++++++ src/Makefile | 73 ++++++ src/aio_ring.h | 49 ++++ src/compat-0_1.c | 62 +++++ src/io_cancel.c | 23 ++ src/io_destroy.c | 23 ++ src/io_getevents.c | 35 +++ src/io_pgetevents.c | 56 ++++ src/io_queue_init.c | 33 +++ src/io_queue_release.c | 27 ++ src/io_queue_run.c | 39 +++ src/io_queue_wait.c | 31 +++ src/io_setup.c | 23 ++ src/io_submit.c | 23 ++ src/libaio.h | 300 +++++++++++++++++++++ src/libaio.map | 27 ++ src/raw_syscall.c | 19 ++ src/syscall-alpha.h | 5 + src/syscall-arm.h | 26 ++ src/syscall-generic.h | 11 + src/syscall-i386.h | 6 + src/syscall-ia64.h | 5 + src/syscall-ppc.h | 5 + src/syscall-s390.h | 5 + src/syscall-sparc.h | 5 + src/syscall-x86_64.h | 6 + src/syscall.h | 73 ++++++ src/vsys_def.h | 24 ++ 77 files changed, 5413 insertions(+) create mode 100644 .gitignore create mode 100644 COPYING create mode 100644 ChangeLog create mode 100644 INSTALL create mode 100644 Makefile create mode 100644 README.md create mode 100644 TODO create mode 100644 harness/Makefile create mode 100644 harness/README create mode 100644 harness/attic/0.t create mode 100644 harness/attic/1.t create mode 100644 harness/cases/10.t create mode 100644 harness/cases/11.t create mode 100644 harness/cases/12.t create mode 100644 harness/cases/13.t create mode 100644 harness/cases/14.t create mode 100644 harness/cases/15.t create mode 100644 harness/cases/16.t create mode 100644 harness/cases/17.t create mode 100644 harness/cases/18.t create mode 100644 harness/cases/19.t create mode 100644 harness/cases/2.t create mode 100644 harness/cases/20.t create mode 100644 harness/cases/21.t create mode 100644 harness/cases/22.t create mode 100644 harness/cases/3.t create mode 100644 harness/cases/4.t create mode 100644 harness/cases/5.t create mode 100644 harness/cases/6.t create mode 100644 harness/cases/7.t create mode 100644 harness/cases/8.t create mode 100644 harness/cases/aio_setup.h create mode 100644 harness/cases/common-7-8.h create mode 100644 harness/ext2-enospc.img create mode 100644 harness/main.c create mode 100755 harness/runtests.sh create mode 100644 libaio.spec create mode 100644 man/io.3 create mode 100644 man/io_cancel.3 create mode 100644 man/io_fsync.3 create mode 100644 man/io_getevents.3 create mode 100644 man/io_prep_fsync.3 create mode 100644 man/io_prep_pread.3 create mode 100644 man/io_prep_pwrite.3 create mode 100644 man/io_queue_init.3 create mode 100644 man/io_queue_release.3 create mode 100644 man/io_queue_run.3 create mode 100644 man/io_queue_wait.3 create mode 100644 man/io_set_callback.3 create mode 100644 man/io_submit.3 create mode 100644 src/Makefile create mode 100644 src/aio_ring.h create mode 100644 src/compat-0_1.c create mode 100644 src/io_cancel.c create mode 100644 src/io_destroy.c create mode 100644 src/io_getevents.c create mode 100644 src/io_pgetevents.c create mode 100644 src/io_queue_init.c create mode 100644 src/io_queue_release.c create mode 100644 src/io_queue_run.c create mode 100644 src/io_queue_wait.c create mode 100644 src/io_setup.c create mode 100644 src/io_submit.c create mode 100644 src/libaio.h create mode 100644 src/libaio.map create mode 100644 src/raw_syscall.c create mode 100644 src/syscall-alpha.h create mode 100644 src/syscall-arm.h create mode 100644 src/syscall-generic.h create mode 100644 src/syscall-i386.h create mode 100644 src/syscall-ia64.h create mode 100644 src/syscall-ppc.h create mode 100644 src/syscall-s390.h create mode 100644 src/syscall-sparc.h create mode 100644 src/syscall-x86_64.h create mode 100644 src/syscall.h create mode 100644 src/vsys_def.h diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..d4a4309 --- /dev/null +++ b/.gitignore @@ -0,0 +1,10 @@ +*.rej +*.orig +*~ +/*.patch + +*.o +*.o[ls] + +/src/libaio.a +/src/libaio.so* diff --git a/COPYING b/COPYING new file mode 100644 index 0000000..c4792dd --- /dev/null +++ b/COPYING @@ -0,0 +1,515 @@ + + GNU LESSER GENERAL PUBLIC LICENSE + Version 2.1, February 1999 + + Copyright (C) 1991, 1999 Free Software Foundation, Inc. + 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + +[This is the first released version of the Lesser GPL. It also counts + as the successor of the GNU Library Public License, version 2, hence + the version number 2.1.] + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +Licenses are intended to guarantee your freedom to share and change +free software--to make sure the software is free for all its users. + + This license, the Lesser General Public License, applies to some +specially designated software packages--typically libraries--of the +Free Software Foundation and other authors who decide to use it. You +can use it too, but we suggest you first think carefully about whether +this license or the ordinary General Public License is the better +strategy to use in any particular case, based on the explanations +below. + + When we speak of free software, we are referring to freedom of use, +not price. Our General Public Licenses are designed to make sure that +you have the freedom to distribute copies of free software (and charge +for this service if you wish); that you receive source code or can get +it if you want it; that you can change the software and use pieces of +it in new free programs; and that you are informed that you can do +these things. + + To protect your rights, we need to make restrictions that forbid +distributors to deny you these rights or to ask you to surrender these +rights. These restrictions translate to certain responsibilities for +you if you distribute copies of the library or if you modify it. + + For example, if you distribute copies of the library, whether gratis +or for a fee, you must give the recipients all the rights that we gave +you. You must make sure that they, too, receive or can get the source +code. If you link other code with the library, you must provide +complete object files to the recipients, so that they can relink them +with the library after making changes to the library and recompiling +it. And you must show them these terms so they know their rights. + + We protect your rights with a two-step method: (1) we copyright the +library, and (2) we offer you this license, which gives you legal +permission to copy, distribute and/or modify the library. + + To protect each distributor, we want to make it very clear that +there is no warranty for the free library. Also, if the library is +modified by someone else and passed on, the recipients should know +that what they have is not the original version, so that the original +author's reputation will not be affected by problems that might be +introduced by others. +^L + Finally, software patents pose a constant threat to the existence of +any free program. We wish to make sure that a company cannot +effectively restrict the users of a free program by obtaining a +restrictive license from a patent holder. Therefore, we insist that +any patent license obtained for a version of the library must be +consistent with the full freedom of use specified in this license. + + Most GNU software, including some libraries, is covered by the +ordinary GNU General Public License. This license, the GNU Lesser +General Public License, applies to certain designated libraries, and +is quite different from the ordinary General Public License. We use +this license for certain libraries in order to permit linking those +libraries into non-free programs. + + When a program is linked with a library, whether statically or using +a shared library, the combination of the two is legally speaking a +combined work, a derivative of the original library. The ordinary +General Public License therefore permits such linking only if the +entire combination fits its criteria of freedom. The Lesser General +Public License permits more lax criteria for linking other code with +the library. + + We call this license the "Lesser" General Public License because it +does Less to protect the user's freedom than the ordinary General +Public License. It also provides other free software developers Less +of an advantage over competing non-free programs. These disadvantages +are the reason we use the ordinary General Public License for many +libraries. However, the Lesser license provides advantages in certain +special circumstances. + + For example, on rare occasions, there may be a special need to +encourage the widest possible use of a certain library, so that it +becomes +a de-facto standard. To achieve this, non-free programs must be +allowed to use the library. A more frequent case is that a free +library does the same job as widely used non-free libraries. In this +case, there is little to gain by limiting the free library to free +software only, so we use the Lesser General Public License. + + In other cases, permission to use a particular library in non-free +programs enables a greater number of people to use a large body of +free software. For example, permission to use the GNU C Library in +non-free programs enables many more people to use the whole GNU +operating system, as well as its variant, the GNU/Linux operating +system. + + Although the Lesser General Public License is Less protective of the +users' freedom, it does ensure that the user of a program that is +linked with the Library has the freedom and the wherewithal to run +that program using a modified version of the Library. + + The precise terms and conditions for copying, distribution and +modification follow. Pay close attention to the difference between a +"work based on the library" and a "work that uses the library". The +former contains code derived from the library, whereas the latter must +be combined with the library in order to run. +^L + GNU LESSER GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License Agreement applies to any software library or other +program which contains a notice placed by the copyright holder or +other authorized party saying it may be distributed under the terms of +this Lesser General Public License (also called "this License"). +Each licensee is addressed as "you". + + A "library" means a collection of software functions and/or data +prepared so as to be conveniently linked with application programs +(which use some of those functions and data) to form executables. + + The "Library", below, refers to any such software library or work +which has been distributed under these terms. A "work based on the +Library" means either the Library or any derivative work under +copyright law: that is to say, a work containing the Library or a +portion of it, either verbatim or with modifications and/or translated +straightforwardly into another language. (Hereinafter, translation is +included without limitation in the term "modification".) + + "Source code" for a work means the preferred form of the work for +making modifications to it. For a library, complete source code means +all the source code for all modules it contains, plus any associated +interface definition files, plus the scripts used to control +compilation +and installation of the library. + + Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running a program using the Library is not restricted, and output from +such a program is covered only if its contents constitute a work based +on the Library (independent of the use of the Library in a tool for +writing it). Whether that is true depends on what the Library does +and what the program that uses the Library does. + + 1. You may copy and distribute verbatim copies of the Library's +complete source code as you receive it, in any medium, provided that +you conspicuously and appropriately publish on each copy an +appropriate copyright notice and disclaimer of warranty; keep intact +all the notices that refer to this License and to the absence of any +warranty; and distribute a copy of this License along with the +Library. + + You may charge a fee for the physical act of transferring a copy, +and you may at your option offer warranty protection in exchange for a +fee. + + 2. You may modify your copy or copies of the Library or any portion +of it, thus forming a work based on the Library, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) The modified work must itself be a software library. + + b) You must cause the files modified to carry prominent notices + stating that you changed the files and the date of any change. + + c) You must cause the whole of the work to be licensed at no + charge to all third parties under the terms of this License. + + d) If a facility in the modified Library refers to a function or a + table of data to be supplied by an application program that uses + the facility, other than as an argument passed when the facility + is invoked, then you must make a good faith effort to ensure that, + in the event an application does not supply such function or + table, the facility still operates, and performs whatever part of + its purpose remains meaningful. + + (For example, a function in a library to compute square roots has + a purpose that is entirely well-defined independent of the + application. Therefore, Subsection 2d requires that any + application-supplied function or table used by this function must + be optional: if the application does not supply it, the square + root function must still compute square roots.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Library, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Library, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote +it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Library. + +In addition, mere aggregation of another work not based on the Library +with the Library (or with a work based on the Library) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may opt to apply the terms of the ordinary GNU General Public +License instead of this License to a given copy of the Library. To do +this, you must alter all the notices that refer to this License, so +that they refer to the ordinary GNU General Public License, version 2, +instead of to this License. (If a newer version than version 2 of the +ordinary GNU General Public License has appeared, then you can specify +that version instead if you wish.) Do not make any other change in +these notices. +^L + Once this change is made in a given copy, it is irreversible for +that copy, so the ordinary GNU General Public License applies to all +subsequent copies and derivative works made from that copy. + + This option is useful when you wish to copy part of the code of +the Library into a program that is not a library. + + 4. You may copy and distribute the Library (or a portion or +derivative of it, under Section 2) in object code or executable form +under the terms of Sections 1 and 2 above provided that you accompany +it with the complete corresponding machine-readable source code, which +must be distributed under the terms of Sections 1 and 2 above on a +medium customarily used for software interchange. + + If distribution of object code is made by offering access to copy +from a designated place, then offering equivalent access to copy the +source code from the same place satisfies the requirement to +distribute the source code, even though third parties are not +compelled to copy the source along with the object code. + + 5. A program that contains no derivative of any portion of the +Library, but is designed to work with the Library by being compiled or +linked with it, is called a "work that uses the Library". Such a +work, in isolation, is not a derivative work of the Library, and +therefore falls outside the scope of this License. + + However, linking a "work that uses the Library" with the Library +creates an executable that is a derivative of the Library (because it +contains portions of the Library), rather than a "work that uses the +library". The executable is therefore covered by this License. +Section 6 states terms for distribution of such executables. + + When a "work that uses the Library" uses material from a header file +that is part of the Library, the object code for the work may be a +derivative work of the Library even though the source code is not. +Whether this is true is especially significant if the work can be +linked without the Library, or if the work is itself a library. The +threshold for this to be true is not precisely defined by law. + + If such an object file uses only numerical parameters, data +structure layouts and accessors, and small macros and small inline +functions (ten lines or less in length), then the use of the object +file is unrestricted, regardless of whether it is legally a derivative +work. (Executables containing this object code plus portions of the +Library will still fall under Section 6.) + + Otherwise, if the work is a derivative of the Library, you may +distribute the object code for the work under the terms of Section 6. +Any executables containing that work also fall under Section 6, +whether or not they are linked directly with the Library itself. +^L + 6. As an exception to the Sections above, you may also combine or +link a "work that uses the Library" with the Library to produce a +work containing portions of the Library, and distribute that work +under terms of your choice, provided that the terms permit +modification of the work for the customer's own use and reverse +engineering for debugging such modifications. + + You must give prominent notice with each copy of the work that the +Library is used in it and that the Library and its use are covered by +this License. You must supply a copy of this License. If the work +during execution displays copyright notices, you must include the +copyright notice for the Library among them, as well as a reference +directing the user to the copy of this License. Also, you must do one +of these things: + + a) Accompany the work with the complete corresponding + machine-readable source code for the Library including whatever + changes were used in the work (which must be distributed under + Sections 1 and 2 above); and, if the work is an executable linked + with the Library, with the complete machine-readable "work that + uses the Library", as object code and/or source code, so that the + user can modify the Library and then relink to produce a modified + executable containing the modified Library. (It is understood + that the user who changes the contents of definitions files in the + Library will not necessarily be able to recompile the application + to use the modified definitions.) + + b) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (1) uses at run time a + copy of the library already present on the user's computer system, + rather than copying library functions into the executable, and (2) + will operate properly with a modified version of the library, if + the user installs one, as long as the modified version is + interface-compatible with the version that the work was made with. + + c) Accompany the work with a written offer, valid for at + least three years, to give the same user the materials + specified in Subsection 6a, above, for a charge no more + than the cost of performing this distribution. + + d) If distribution of the work is made by offering access to copy + from a designated place, offer equivalent access to copy the above + specified materials from the same place. + + e) Verify that the user has already received a copy of these + materials or that you have already sent this user a copy. + + For an executable, the required form of the "work that uses the +Library" must include any data and utility programs needed for +reproducing the executable from it. However, as a special exception, +the materials to be distributed need not include anything that is +normally distributed (in either source or binary form) with the major +components (compiler, kernel, and so on) of the operating system on +which the executable runs, unless that component itself accompanies +the executable. + + It may happen that this requirement contradicts the license +restrictions of other proprietary libraries that do not normally +accompany the operating system. Such a contradiction means you cannot +use both them and the Library together in an executable that you +distribute. +^L + 7. You may place library facilities that are a work based on the +Library side-by-side in a single library together with other library +facilities not covered by this License, and distribute such a combined +library, provided that the separate distribution of the work based on +the Library and of the other library facilities is otherwise +permitted, and provided that you do these two things: + + a) Accompany the combined library with a copy of the same work + based on the Library, uncombined with any other library + facilities. This must be distributed under the terms of the + Sections above. + + b) Give prominent notice with the combined library of the fact + that part of it is a work based on the Library, and explaining + where to find the accompanying uncombined form of the same work. + + 8. You may not copy, modify, sublicense, link with, or distribute +the Library except as expressly provided under this License. Any +attempt otherwise to copy, modify, sublicense, link with, or +distribute the Library is void, and will automatically terminate your +rights under this License. However, parties who have received copies, +or rights, from you under this License will not have their licenses +terminated so long as such parties remain in full compliance. + + 9. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Library or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Library (or any work based on the +Library), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Library or works based on it. + + 10. Each time you redistribute the Library (or any work based on the +Library), the recipient automatically receives a license from the +original licensor to copy, distribute, link with or modify the Library +subject to these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties with +this License. +^L + 11. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Library at all. For example, if a patent +license would not permit royalty-free redistribution of the Library by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Library. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply, and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 12. If the distribution and/or use of the Library is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Library under this License +may add an explicit geographical distribution limitation excluding those +countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 13. The Free Software Foundation may publish revised and/or new +versions of the Lesser General Public License from time to time. +Such new versions will be similar in spirit to the present version, +but may differ in detail to address new problems or concerns. + +Each version is given a distinguishing version number. If the Library +specifies a version number of this License which applies to it and +"any later version", you have the option of following the terms and +conditions either of that version or of any later version published by +the Free Software Foundation. If the Library does not specify a +license version number, you may choose any version ever published by +the Free Software Foundation. +^L + 14. If you wish to incorporate parts of the Library into other free +programs whose distribution conditions are incompatible with these, +write to the author to ask for permission. For software which is +copyrighted by the Free Software Foundation, write to the Free +Software Foundation; we sometimes make exceptions for this. Our +decision will be guided by the two goals of preserving the free status +of all derivatives of our free software and of promoting the sharing +and reuse of software generally. + + NO WARRANTY + + 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO +WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. +EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR +OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY +KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE +LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME +THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN +WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY +AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU +FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR +CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE +LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING +RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A +FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF +SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH +DAMAGES. + + END OF TERMS AND CONDITIONS +^L + How to Apply These Terms to Your New Libraries + + If you develop a new library, and you want it to be of the greatest +possible use to the public, we recommend making it free software that +everyone can redistribute and change. You can do so by permitting +redistribution under these terms (or, alternatively, under the terms +of the ordinary General Public License). + + To apply these terms, attach the following notices to the library. +It is safest to attach them to the start of each source file to most +effectively convey the exclusion of warranty; and each file should +have at least the "copyright" line and a pointer to where the full +notice is found. + + + + Copyright (C) + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +Also add information on how to contact you by electronic and paper +mail. + +You should also get your employer (if you work as a programmer) or +your +school, if any, to sign a "copyright disclaimer" for the library, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the + library `Frob' (a library for tweaking knobs) written by James +Random Hacker. + + , 1 April 1990 + Ty Coon, President of Vice + +That's all there is to it! + + diff --git a/ChangeLog b/ChangeLog new file mode 100644 index 0000000..b1536b4 --- /dev/null +++ b/ChangeLog @@ -0,0 +1,54 @@ +0.3.107 + - Make tests compile again on modern systems (warnings + -Werror) + - Add 'make partcheck' and don't require manual setup for testing. + - Change test harness to compile against this dir, not global install + - Fix 5.t for archs where PROT_WRITE mappings are readable. + - Allow sending of SIGXFSZ on aio over limits + - Explicitly specify bash for runtests.sh + - Put deprecating comments on never-merged io_prep_poll + - Add io_prep_preadv and io_prep_pwritev + - Add eventfd support (io_set_eventfd). + +0.4.0 + - remove libredhat-kernel + - add rough outline for man pages + - make the compiled io_getevents() add the extra parameter and + pass the timeout for updating as per 2.5 + - fixes for ia64, now works + - fixes for x86-64 + - powerpc support from Gianni Tedesco + - disable the NULL check in harness/cases/4.t on ia64: ia64 + maps the 0 page and causes this check to fail. + +0.3.15 + - use real syscall interface, but don't break source compatibility + yet (that will happen with 0.4.0) + +0.3.13 + - add test cases + +0.3.11 + - use library versioning of libredhat-kernel to always provide a + fallback + +0.3.9 + - add io_queue_release function + +0.3.8 + - make clean deletes libredhat-kernel.so.1 + - const struct timespec * + - add make srpm target + +0.3.7 + - fix assembly function .types + - export io_getevents + - fix io_submit function prototype to match the kernel + - provide /usr/lib/libredhat-kernel.so link for compilation + (do NOT link against libredhat-kernel.so directly) + - fix soname to libaio.so.1 + - fix dummy libredhat-kernel's soname + - work around nfs bug + - provide and install libredhat-kernel.so.1 stub + - Makefile improvements + - make sure dummy libredhat-kernel.so only returns -ENOSYS + diff --git a/INSTALL b/INSTALL new file mode 100644 index 0000000..29b9077 --- /dev/null +++ b/INSTALL @@ -0,0 +1,18 @@ +To install the library, execute the command: + + make prefix=`pwd`/usr install + +which will install the binaries and header files into the directory +usr. Set prefix=/usr to get them installed into the main system. + +Please note: Do not attempt to install on the system the +"libredhat-kernel.so" file. It is a dummy shared library +provided only for the purpose of being able to bootstrap +this facility while running on systems without the correct +libredhat-kernel.so built. The contents of the included +libredhat-kernel.so are only stubs; this library is NOT +functional for anything except the internal purpose of +linking libaio.so against the provided stubs. At runtime, +libaio.so requires a real libredhat-kernel.so library; this +is provided by the Red Hat kernel RPM packages with async +I/O functionality. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..c1fb831 --- /dev/null +++ b/Makefile @@ -0,0 +1,39 @@ +NAME=libaio +SPECFILE=$(NAME).spec +VERSION=$(shell awk '/Version:/ { print $$2 }' $(SPECFILE)) +TAG = $(NAME)-$(VERSION) +RPMBUILD=$(shell `which rpmbuild >&/dev/null` && echo "rpmbuild" || echo "rpm") + +prefix=/usr +includedir=$(prefix)/include +libdir=$(prefix)/lib + +default: all + +all: + @$(MAKE) -C src + +install: + @$(MAKE) -C src install prefix=$(DESTDIR)$(prefix) includedir=$(DESTDIR)$(includedir) libdir=$(DESTDIR)$(libdir) + +check: + @$(MAKE) -C harness check + +partcheck: all + @$(MAKE) -C harness partcheck + +clean: + @$(MAKE) -C src clean + @$(MAKE) -C harness clean + +tag-archive: + @git tag $(TAG) + +create-archive: + @git archive --prefix=$(NAME)-$(VERSION)/ -o $(NAME)-$(VERSION).tar.gz $(TAG) + @echo "The final archive is ./$(NAME)-$(VERSION).tar.gz." + +archive: clean tag-archive create-archive + +srpm: create-archive + $(RPMBUILD) --define "_sourcedir `pwd`" --define "_srcrpmdir `pwd`" --nodeps -bs $(SPECFILE) diff --git a/README.md b/README.md new file mode 100644 index 0000000..692f4a7 --- /dev/null +++ b/README.md @@ -0,0 +1,12 @@ +# libaio + +The Linux-native asynchronous I/O facility ("async I/O", or "aio") has a +richer API and capability set than the simple POSIX async I/O facility. +This library, libaio, provides the Linux-native API for async I/O. +The POSIX async I/O facility requires this library in order to provide +kernel-accelerated async I/O capabilities, as do applications which +require the Linux-native async I/O API. + +## Contributing + +Patches should be submitted to linux-aio@kvack.org. diff --git a/TODO b/TODO new file mode 100644 index 0000000..0a9ac15 --- /dev/null +++ b/TODO @@ -0,0 +1,4 @@ +- Write man pages. +- Make -static links against libaio work. +- Fallback on userspace if the kernel calls return -ENOSYS. + diff --git a/harness/Makefile b/harness/Makefile new file mode 100644 index 0000000..f477737 --- /dev/null +++ b/harness/Makefile @@ -0,0 +1,56 @@ +# foo. +TEST_SRCS:=$(shell find cases/ -name \*.t | sort -n -t/ -k2) +EXTRAPROGS:=cases/8.p cases/10.p +PARTPROGS:=$(filter-out $(EXTRAPROGS), $(patsubst %.t,%.p,$(TEST_SRCS))) +PROGS:=$(PARTPROGS) $(EXTRAPROGS) +HARNESS_SRCS:=main.c +# io_queue.c + +CFLAGS+=-Wall -Werror -I../src -g -O2 -DPAGE_SIZE=$(shell getconf PAGESIZE) +#-lpthread -lrt + +all: $(PROGS) + +$(PROGS): %.p: %.t $(HARNESS_SRCS) + $(CC) $(CFLAGS) -DTEST_NAME=\"$<\" -o $@ main.c ../src/libaio.a -lpthread + +clean: + rm -f $(PROGS) *.o runtests.out rofile wofile rwfile + +.PHONY: + +testdir/rofile: testdir .PHONY + rm -f $@ + echo "test" >$@ + chmod 400 $@ + +testdir/wofile: testdir .PHONY + rm -f $@ + echo "test" >$@ + chmod 200 $@ + +testdir/rwfile: testdir .PHONY + rm -f $@ + echo "test" >$@ + chmod 600 $@ + +testdir testdir.enospc testdir.ext2: + mkdir $@ + +root: .PHONY + @if [ `id -u` -ne 0 ]; then echo Need root for check, try partcheck >&2; exit 1; fi + +partcheck: $(PARTPROGS) testdir/rofile testdir/rwfile testdir/wofile + ./runtests.sh $(PARTPROGS) + +ext2.img: + dd if=/dev/zero bs=1M count=10 of=$@ + mke2fs -F -b 4096 $@ + +extracheck: $(EXTRAPROGS) root testdir.ext2 testdir.enospc ext2.img + mount -o loop -t ext2 ext2-enospc.img testdir.enospc + ./runtests.sh cases/10.p; ret=$$?; umount testdir.enospc; exit $$ret + mount -o loop -t ext2 ext2.img testdir.ext2 + ./runtests.sh cases/8.p; ret=$$?; umount testdir.ext2; exit $$ret + +check: partcheck extracheck diff --git a/harness/README b/harness/README new file mode 100644 index 0000000..5557370 --- /dev/null +++ b/harness/README @@ -0,0 +1,19 @@ +Notes on running this test suite: + +To run the test suite, run "make check". All test cases should pass +and there should be 0 fails. + +Several of the test cases require a directory on the filesystem under +test for the creation of test files, as well as the generation of +error conditions. The test cases assume the directories (or symlinks +to directories) are as follows: + + testdir/ + - used for general read/write test cases. Must have at + least as much free space as the machine has RAM (up + to 768MB). + testdir.enospc/ + - a filesystem that has space for writing 8KB out, but + fails with -ENOSPC beyond 8KB. + testdir.ext2/ + - must be an ext2 filesystem. diff --git a/harness/attic/0.t b/harness/attic/0.t new file mode 100644 index 0000000..033e62c --- /dev/null +++ b/harness/attic/0.t @@ -0,0 +1,9 @@ +/* 0.t + Test harness check: okay. +*/ +int test_main(void) +{ + printf("test_main: okay\n"); + return 0; +} + diff --git a/harness/attic/1.t b/harness/attic/1.t new file mode 100644 index 0000000..799ffd1 --- /dev/null +++ b/harness/attic/1.t @@ -0,0 +1,9 @@ +/* 1.t + Test harness check: fail. +*/ +int test_main(void) +{ + printf("test_main: fail\n"); + return 1; +} + diff --git a/harness/cases/10.t b/harness/cases/10.t new file mode 100644 index 0000000..9d3beb2 --- /dev/null +++ b/harness/cases/10.t @@ -0,0 +1,53 @@ +/* 10.t - uses testdir.enospc/rwfile +- Check results on out-of-space and out-of-quota. (10.t) + - write that fills filesystem but does not go over should succeed + - write that fills filesystem and goes over should be partial + - write to full filesystem should return -ENOSPC + - read beyond end of file after ENOSPC should return 0 +*/ +#include "aio_setup.h" + +#include +#include +#include + +int test_main(void) +{ +/* Note: changing either of these requires updating the ext2-enospc.img + * filesystem image. Also, if SIZE is less than PAGE_SIZE, problems + * crop up due to ext2's preallocation. + */ +#define LIMIT 65536 +#define SIZE 65536 + char *buf; + int rwfd; + int status = 0, res; + + rwfd = open("testdir.enospc/rwfile", O_RDWR|O_CREAT|O_TRUNC, 0600); + assert(rwfd != -1); + res = ftruncate(rwfd, 0); assert(res == 0); + buf = malloc(SIZE); assert(buf != NULL); + memset(buf, 0, SIZE); + + + status |= attempt_rw(rwfd, buf, SIZE, LIMIT-SIZE, WRITE, SIZE); + status |= attempt_rw(rwfd, buf, SIZE, LIMIT-SIZE, READ, SIZE); + + status |= attempt_rw(rwfd, buf, SIZE, LIMIT, WRITE, -ENOSPC); + status |= attempt_rw(rwfd, buf, SIZE, LIMIT, READ, 0); + + res = ftruncate(rwfd, 0); assert(res == 0); + + status |= attempt_rw(rwfd, buf, SIZE, 1+LIMIT-SIZE, WRITE, SIZE-1); + status |= attempt_rw(rwfd, buf, SIZE, 1+LIMIT-SIZE, READ, SIZE-1); + status |= attempt_rw(rwfd, buf, SIZE, LIMIT, READ, 0); + + status |= attempt_rw(rwfd, buf, SIZE, LIMIT, WRITE, -ENOSPC); + status |= attempt_rw(rwfd, buf, SIZE, LIMIT, READ, 0); + status |= attempt_rw(rwfd, buf, 0, LIMIT, WRITE, 0); + + res = close(rwfd); assert(res == 0); + res = unlink("testdir.enospc/rwfile"); assert(res == 0); + return status; +} + diff --git a/harness/cases/11.t b/harness/cases/11.t new file mode 100644 index 0000000..efcf6d4 --- /dev/null +++ b/harness/cases/11.t @@ -0,0 +1,39 @@ +/* 11.t - uses testdir/rwfile +- repeated read / write of same page (to check accounting) (11.t) +*/ +#include "aio_setup.h" + +#include +#include +#include + +int test_main(void) +{ +#define COUNT 1000000 +#define SIZE 256 + char *buf; + int rwfd; + int status = 0; + int i; + + rwfd = open("testdir/rwfile", O_RDWR|O_CREAT|O_TRUNC, 0600); + assert(rwfd != -1); + buf = malloc(SIZE); assert(buf != NULL); + memset(buf, 0, SIZE); + + for (i=0; i +#include +#include +#include + +#include "aio_setup.h" + +void test_child(void) +{ + int res; + res = attempt_io_submit(io_ctx, 0, NULL, -EINVAL); + fflush(stdout); + _exit(res); +} + +int test_main(void) +{ + int res, status; + pid_t pid; + sigset_t set; + + if (attempt_io_submit(io_ctx, 0, NULL, 0)) + return 1; + + sigemptyset(&set); + sigaddset(&set, SIGCHLD); + sigprocmask(SIG_BLOCK, &set, NULL); + + fflush(NULL); + pid = fork(); assert(pid != -1); + + if (pid == 0) + test_child(); + + res = waitpid(pid, &status, 0); + if (res < 0) { + printf("waitpid error\n"); + return res; + } + + if (WIFEXITED(status)) { + int failed = (WEXITSTATUS(status) != 0); + printf("child exited with status %d%s\n", WEXITSTATUS(status), + failed ? " -- FAILED" : ""); + return failed; + } + + /* anything else: failed */ + if (WIFSIGNALED(status)) + printf("child killed by signal %d -- FAILED.\n", + WTERMSIG(status)); + + return 1; +} diff --git a/harness/cases/13.t b/harness/cases/13.t new file mode 100644 index 0000000..5f18005 --- /dev/null +++ b/harness/cases/13.t @@ -0,0 +1,66 @@ +/* 13.t - uses testdir/rwfile +- Submit multiple writes larger than aio-max-size (deadlocks on older + aio code) +*/ +#include "aio_setup.h" + +#include +#include +#include + +int test_main(void) +{ +#define SIZE (1024 * 1024) +#define IOS 8 + struct iocb iocbs[IOS]; + struct iocb *iocb_list[IOS]; + char *bufs[IOS]; + int rwfd; + int status = 0, res; + int i; + + rwfd = open("testdir/rwfile", O_RDWR|O_CREAT|O_TRUNC, 0600); + assert(rwfd != -1); + res = ftruncate(rwfd, 0); assert(res == 0); + + for (i=0; i +#include +#include +#include + +#include "aio_setup.h" +#include + +#define SIZE 768*1024*1024 + +//just submit an I/O + +int test_child(void) +{ + char *buf; + int rwfd; + int res; + long size; + struct iocb iocb; + struct iocb *iocbs[] = { &iocb }; + int loop = 10; + int i; + + aio_setup(1024); + + size = SIZE; + + printf("size = %ld\n", size); + + rwfd = open("testdir/rwfile", O_RDWR); assert(rwfd != +-1); + res = ftruncate(rwfd, 0); assert(res == 0); + buf = malloc(size); assert(buf != +NULL); + + for(i=0;i +#include +#include + +int test_main(void) +{ +#define SIZE 512 +#define NUM_IOV 10 + char buf[SIZE*NUM_IOV]; + struct iovec iov[NUM_IOV]; + int rwfd; + int status = 0, res, i; + + rwfd = open("testdir/rwfile", O_RDWR); assert(rwfd != -1); + res = ftruncate(rwfd, sizeof(buf)); assert(res == 0); + + for (i = 0; i < NUM_IOV; i++) { + iov[i].iov_base = buf + i*SIZE; + iov[i].iov_len = SIZE; + memset(iov[i].iov_base, i, SIZE); + } + status |= attempt_rw(rwfd, iov, NUM_IOV, 0, WRITEV, SIZE*NUM_IOV); + res = pread(rwfd, buf, sizeof(buf), 0); assert(res == sizeof(buf)); + for (i = 0; i < NUM_IOV; i++) { + unsigned int j; + for (j = 0; j < SIZE; j++) { + if (buf[i*SIZE + j] != i) { + printf("Unexpected value after writev at %i\n", + i*SIZE + j); + status |= 1; + break; + } + } + } + if (!status) + printf("Checking memory: [Success]\n"); + + memset(buf, 0, sizeof(buf)); + status |= attempt_rw(rwfd, iov, NUM_IOV, 0, READV, SIZE*NUM_IOV); + for (i = 0; i < NUM_IOV; i++) { + unsigned int j; + for (j = 0; j < SIZE; j++) { + if (buf[i*SIZE + j] != i) { + printf("Unexpected value after readv at %i\n", + i*SIZE + j); + status |= 1; + break; + } + } + } + + /* Check that offset works. */ + status |= attempt_rw(rwfd, iov+1, NUM_IOV-1, SIZE, WRITEV, + SIZE*(NUM_IOV-1)); + memset(buf, 0, sizeof(buf)); + res = pread(rwfd, buf, sizeof(buf), 0); assert(res == sizeof(buf)); + for (i = 1; i < NUM_IOV; i++) { + unsigned int j; + for (j = 0; j < SIZE; j++) { + if (buf[i*SIZE + j] != i) { + printf("Unexpected value after offset writev at %i\n", + i*SIZE + j); + status |= 1; + break; + } + } + } + if (!status) + printf("Checking memory: [Success]\n"); + + memset(buf, 0, sizeof(buf)); + status |= attempt_rw(rwfd, iov+1, NUM_IOV-1, SIZE, READV, + SIZE*(NUM_IOV-1)); + for (i = 1; i < NUM_IOV; i++) { + unsigned int j; + for (j = 0; j < SIZE; j++) { + if (buf[i*SIZE + j] != i) { + printf("Unexpected value after offset readv at %i\n", + i*SIZE + j); + status |= 1; + break; + } + } + } + if (!status) + printf("Checking memory: [Success]\n"); + + return status; +} + diff --git a/harness/cases/16.t b/harness/cases/16.t new file mode 100644 index 0000000..5a546ff --- /dev/null +++ b/harness/cases/16.t @@ -0,0 +1,104 @@ +/* 16.t +- eventfd tests. +*/ +#include +#include +#include /* For SYS_xxx definitions */ + +#ifndef SYS_eventfd +#if defined(__i386__) +#define SYS_eventfd 323 +#elif defined(__x86_64__) +#define SYS_eventfd 284 +#elif defined(__ia64__) +#define SYS_eventfd 1309 +#elif defined(__PPC__) +#define SYS_eventfd 307 +#elif defined(__s390__) +#define SYS_eventfd 318 +#elif defined(__alpha__) +#define SYS_eventfd 478 +#elif defined(__aarch64__) +/* arm64 does not implement eventfd, only eventfd2 */ +#define USE_EVENTFD2 +#ifndef SYS_eventfd2 +#define SYS_eventfd2 19 +#endif /* __aarch64__ */ +#else +#error define SYS_eventfd for your arch! +#endif +#endif + +int test_main(void) +{ + /* 10 MB takes long enough that we would fail if eventfd + * returned immediately. */ +#define SIZE 10000000 + char *buf; + struct io_event io_event; + struct iocb iocb; + struct iocb *iocbs[] = { &iocb }; + int rwfd, efd; + int res; + io_context_t io_ctx; + uint64_t event; + struct timespec notime = { .tv_sec = 0, .tv_nsec = 0 }; + + buf = malloc(SIZE); assert(buf); +#ifndef USE_EVENTFD2 + efd = syscall(SYS_eventfd, 0); +#else + efd = syscall(SYS_eventfd2, 0, 0); +#endif + if (efd < 0) { + if (errno == ENOSYS) { + printf("No eventfd support. [SKIPPING]\n"); + exit(0); + } + err(1, "Failed to get eventfd"); + } + + rwfd = open("testdir/rwfile", O_RDWR); assert(rwfd != -1); + res = ftruncate(rwfd, 0); assert(res == 0); + memset(buf, 0x42, SIZE); + + /* Write test. */ + res = io_queue_init(1024, &io_ctx); assert(res == 0); + io_prep_pwrite(&iocb, rwfd, buf, SIZE, 0); + io_set_eventfd(&iocb, efd); + res = io_submit(io_ctx, 1, iocbs); assert(res == 1); + + alarm(30); + res = read(efd, &event, sizeof(event)); assert(res == sizeof(event)); + assert(event == 1); + + /* This should now be ready. */ + res = io_getevents(io_ctx, 0, 1, &io_event, ¬ime); + if (res != 1) + err(1, "io_getevents did not return 1 event after eventfd"); + assert(io_event.res == SIZE); + printf("eventfd write test [SUCCESS]\n"); + + /* Read test. */ + memset(buf, 0, SIZE); + io_prep_pread(&iocb, rwfd, buf, SIZE, 0); + io_set_eventfd(&iocb, efd); + res = io_submit(io_ctx, 1, iocbs); assert(res == 1); + + alarm(30); + res = read(efd, &event, sizeof(event)); assert(res == sizeof(event)); + assert(event == 1); + + /* This should now be ready. */ + res = io_getevents(io_ctx, 0, 1, &io_event, ¬ime); + if (res != 1) + err(1, "io_getevents did not return 1 event after eventfd"); + assert(io_event.res == SIZE); + + for (res = 0; res < SIZE; res++) + assert(buf[res] == 0x42); + printf("eventfd read test [SUCCESS]\n"); + + return 0; +} + diff --git a/harness/cases/17.t b/harness/cases/17.t new file mode 100644 index 0000000..38ada4d --- /dev/null +++ b/harness/cases/17.t @@ -0,0 +1,265 @@ +/* + * Copyright (C) 2014, Dan Aloni, Kernelim Ltd. + * Copyright (C) 2014, Benjamin LaHaise . + * Copyright (C) 2014, Red Hat, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +/* + * Description: + * This regression test ensures that submitting more events than can + * fit in the completion ring will not result in a hung task. + */ +#ifndef _GNU_SOURCE +#define _GNU_SOURCE 1 +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +const int max_events = 32; +const int io_size = 0x1000; +struct iocb *io; +struct iocb **iops; +struct iovec *iovecs; +struct io_event *events; +char *data; + +long submitted = 0; +long completed = 0; +long pending = 0; + +#define SYS_IO_GETEVENTS 0 +#define USER_GETEVENTS 1 + +static volatile sig_atomic_t done = 0; + +struct aio_ring { + unsigned id; /* kernel internal index number */ + unsigned nr; /* number of io_events */ + volatile unsigned head; + volatile unsigned tail; + + unsigned magic; + unsigned compat_features; + unsigned incompat_features; + unsigned header_length; /* size of aio_ring */ + + struct io_event io_events[0]; +}; + +int get_ring_size(int nr_events) +{ + io_context_t ctx; + int ret, ring_size; + struct aio_ring *ring; + + memset(&ctx, 0, sizeof(ctx)); + ret = io_setup(nr_events, &ctx); + assert(!ret); + + ring = (void *)ctx; + ring_size = ring->nr; + + ret = io_destroy(ctx); + assert(!ret); + + return ring_size; +} + +int user_getevents(io_context_t ctx, int nr_events, struct io_event *event) +{ + struct aio_ring *ring = (void *)ctx; + int completed = 0; + while ((completed < nr_events) && (ring->head != ring->tail)) { + unsigned new_head = ring->head; + *event = ring->io_events[new_head]; + new_head += 1; + new_head %= ring->nr; + ring->head = new_head; + completed++; + } + return completed; +} + +void prune(io_context_t io_ctx, int max_ios, int getevents_type) +{ + int ret; + + if (getevents_type == USER_GETEVENTS) + ret = user_getevents(io_ctx, max_ios, events); + else + ret = io_getevents(io_ctx, pending, max_ios, events, NULL); + if (ret > 0) { + printf("Completed: %d\n", ret); + completed += ret; + pending -= ret; + } +} + +void run_test(int max_ios, int getevents_type) +{ + int fd, ret; + long i, to_submit; + struct iocb **iocb_sub; + io_context_t io_ctx; + const char *filename = "testfile"; + + printf("MAX_IOS: %d, %s\n", max_ios, getevents_type == USER_GETEVENTS ? + "USER_GETEVENTS" : "IO_GETEVENTS"); + memset(&io_ctx, 0, sizeof(io_ctx)); + ret = io_setup(max_events, &io_ctx); + assert(!ret); + + io = calloc(max_ios, sizeof(*io)); + iops = calloc(max_ios, sizeof(*iops)); + iovecs = calloc(max_ios, sizeof(*iovecs)); + events = calloc(max_ios, sizeof(*events)); + + unlink(filename); + fd = open(filename, O_CREAT | O_RDWR | O_DIRECT, 0644); + assert(fd >= 0); + + ret = ftruncate(fd, max_ios * io_size); + assert(!ret); + + data = mmap(NULL, io_size * max_ios, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + assert(data != MAP_FAILED); + + for (i = 0; i < max_ios; i++) { + iops[i] = &io[i]; + io[i].data = io; + iovecs[i].iov_base = &data[io_size * i]; + iovecs[i].iov_len = io_size; + io_prep_preadv(&io[i], fd, &iovecs[i], 1, 0); + } + + submitted = completed = pending = 0; + + to_submit = max_ios; + iocb_sub = iops; + + while (submitted < max_ios) { + printf("Submitting: %ld\n", to_submit); + + ret = io_submit(io_ctx, to_submit, iocb_sub); + if (ret >= 0) { + printf("Submitted: %d\n", ret); + submitted += ret; + iocb_sub += ret; + pending += ret; + to_submit -= ret; + } else { + if (ret == -EAGAIN) { + printf("Submitted too much, that's okay\n"); + prune(io_ctx, max_ios, getevents_type); + } + } + } + + prune(io_ctx, max_ios, getevents_type); + io_destroy(io_ctx); + close(fd); + ret = munmap(data, io_size * max_ios); + assert(!ret); + + printf("Verifying...\n"); + + assert(completed == submitted); + + printf("OK\n"); +} + +void run_child(void) +{ + int ring_size; + + ring_size = get_ring_size(max_events); + + printf("aio ring size: %d\n", ring_size); + + run_test(ring_size-1, SYS_IO_GETEVENTS); + run_test(ring_size, SYS_IO_GETEVENTS); + run_test(ring_size+1, SYS_IO_GETEVENTS); + run_test(ring_size*2, SYS_IO_GETEVENTS); + run_test(ring_size*4, SYS_IO_GETEVENTS); + + run_test(ring_size-1, USER_GETEVENTS); + run_test(ring_size, USER_GETEVENTS); + run_test(ring_size+1, USER_GETEVENTS); + run_test(ring_size*2, USER_GETEVENTS); + run_test(ring_size*4, USER_GETEVENTS); + + exit(0); +} + +void sighandler(int signo) +{ + assert(signo == SIGCHLD); + done = 1; +} + +int test_main(void) +{ + unsigned int ret; + sighandler_t oldhandler; + pid_t child; + + switch (child = fork()) { + case 0: /* child */ + run_child(); + break; + case -1: + perror("fork"); + exit(1); + default: + oldhandler = signal(SIGCHLD, sighandler); + assert(oldhandler != SIG_ERR); + break; + } + + ret = sleep(10); + if (ret != 0) { + pid_t pid; + int status; + + assert(done); + + pid = wait(&status); + if (pid != child) { + perror("wait"); + exit(1); + } + + return WEXITSTATUS(status); + } + + return 1; /* failed */ +} +/* + * Local Variables: + * mode: c + * c-basic-offset: 8 + * End: + */ diff --git a/harness/cases/18.t b/harness/cases/18.t new file mode 100644 index 0000000..5587ceb --- /dev/null +++ b/harness/cases/18.t @@ -0,0 +1,115 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +/* + * Author: Anatol Pomozov + * + * Description: This code tests to make sure that when io_destroy + * returns, all outstanding I/Os have been completed. It does this by + * issuing one I/O and then calling io_destroy (without calling + * io_getevents). After the call to io_destroy, the buffer is checked + * to ensure that the data was retrieved. This is done simultaneously + * from 100 threads. + */ + +#define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define FILENAME "tempfile" +#define FILEPATTERN '1' +#define DESTROY_PATTERN '2' + +#define THREADS_NUM 100 + +void +aio_worker(void *ptr) +{ + int i, j, fd; + char buffer[PAGE_SIZE] __attribute__((aligned(PAGE_SIZE))); + + fd = open(FILENAME, O_DIRECT|O_RDONLY); + assert(fd >= 0); + + for (i = 0; i < 1000; i++) { + io_context_t ctx; + struct iocb cb; + struct iocb *cbs[1]; + + assert(!io_queue_init(1, &ctx)); + io_prep_pread(&cb, fd, buffer, PAGE_SIZE, 0); + cbs[0] = &cb; + + memset(buffer, '0', PAGE_SIZE); + assert(io_submit(ctx, 1, &cbs[0]) == 1); + // wait random time (0-500ms) ? + + io_destroy(ctx); + memset(buffer, DESTROY_PATTERN, PAGE_SIZE); + // wait random for (0-500ms) ? + + // check it is still DESTROY_PATTERN + for (j = 0; j < PAGE_SIZE; j++) { + if (buffer[j] != DESTROY_PATTERN) { + fprintf(stderr, + "Buffer has unexpected character: %c\n", + buffer[j]); + exit(EXIT_FAILURE); + } + } + } + + close(fd); +} + +int +test_main(void) +{ + int i, fd, ret; + char buffer[PAGE_SIZE]; + pthread_t threads[THREADS_NUM]; + + fd = open(FILENAME, O_CREAT|O_TRUNC|O_APPEND|O_RDWR, S_IRUSR|S_IWUSR); + assert(fd != -1); + + memset(buffer, FILEPATTERN, PAGE_SIZE); + ret = write(fd, buffer, PAGE_SIZE); + assert(ret == PAGE_SIZE); + close(fd); + + for (i = 0; i < THREADS_NUM; i++) { + ret = pthread_create(&threads[i], NULL, + (void *)&aio_worker, NULL); + assert(ret == 0); + } + for (i = 0; i < THREADS_NUM; i++) { + ret = pthread_join(threads[i], NULL); + assert(ret == 0); + } + + return EXIT_SUCCESS; +} +/* + * Local Variables: + * mode: c + * c-basic-offset: 8 + * End: + */ diff --git a/harness/cases/19.t b/harness/cases/19.t new file mode 100644 index 0000000..4989510 --- /dev/null +++ b/harness/cases/19.t @@ -0,0 +1,253 @@ +/* + * Copyright 2015, Red Hat, Inc. + * + * This test remaps the aio ring buffer and ensures that I/O completions + * can still be reaped from userspace. + * + * Author: Jeff Moyer + */ +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include + +#define BUFLEN 4096 +#define TEMPLATE "19.XXXXXX" + +volatile sig_atomic_t timed_out = 0; + +struct aio_ring { + unsigned id; /* kernel internal index number */ + unsigned nr; /* number of io_events */ + volatile unsigned head; + volatile unsigned tail; + + unsigned magic; + unsigned compat_features; + unsigned incompat_features; + unsigned header_length; /* size of aio_ring */ + + struct io_event io_events[0]; +}; + +int +open_temp_file(void) +{ + int fd; + char template[sizeof(TEMPLATE)]; + + strncpy(template, TEMPLATE, sizeof(TEMPLATE)); + fd = mkostemp(template, O_DIRECT); + if (fd < 0) { + perror("mkstemp"); + exit(1); + } + unlink(template); + return fd; +} + +/* + * mmap will do the address space search for us. when remapping the ring, + * the use of MREMAP_FIXED will cause this mapping to be unmapped. + * + * len - length in bytes + * + * Returns the available virtual address, or MAP_FAILED on error. + */ +void * +find_unused_va(size_t len) +{ + return mmap(0, len, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); +} + +void +alarm_handler(int __attribute__((unused))signo) +{ + timed_out = 1; +} + +int +user_getevents(io_context_t ctx, int nr_events, struct io_event *event) +{ + struct aio_ring *ring = (void *)ctx; + int completed = 0; + + timed_out = 0; + signal(SIGALRM, alarm_handler); + alarm(30); + + while ((completed < nr_events) && !timed_out) { + unsigned new_head; + + if (ring->head == ring->tail) { + sched_yield(); + continue; + } + + new_head = ring->head; + *event = ring->io_events[new_head]; + new_head += 1; + new_head %= ring->nr; + ring->head = new_head; + completed++; + } + + alarm(0); + signal(SIGALRM, SIG_DFL); + + return completed; +} + +struct aio_ring * +remap_ring(struct aio_ring *ring) +{ + struct aio_ring *new_ring; + size_t ring_size; + + /* + * No need to round up to page size as ring->nr was adjusted + * already to fill the last page in the ring. + */ + ring_size = sizeof(struct aio_ring) + ring->nr * sizeof(struct io_event); + + /* + * Remap the ring. + */ + new_ring = find_unused_va(ring_size); + if (new_ring == MAP_FAILED) { + fprintf(stderr, "Unable to find suitable va for ring\n"); + return NULL; + } + + new_ring = mremap(ring, ring_size, ring_size, + MREMAP_FIXED|MREMAP_MAYMOVE, new_ring); + if (new_ring == MAP_FAILED || new_ring == ring) { + perror("mremap"); + return NULL; + } + + return new_ring; +} + +io_context_t +remap_io_context(io_context_t ctx) +{ + struct aio_ring *ring, *new_ring; + + ring = (void *)ctx; + new_ring = remap_ring(ring); + if (!new_ring) + return NULL; + + ctx = (io_context_t)new_ring; + return ctx; +} + +int +do_io(io_context_t ctx, struct iocb *iocbp, int fd) +{ + int ret; + char buf[BUFLEN]; + + io_prep_pwrite(iocbp, fd, buf, BUFLEN, 0); + ret = io_submit(ctx, 1, &iocbp); + if (ret != 1) { + fprintf(stderr, "io_submit failed with %d\n", ret); + return 1; + } + return 0; +} + +int +check_completion(io_context_t ctx, struct iocb *iocbp) +{ + int ret; + struct io_event event; + + ret = user_getevents(ctx, 1, &event); + if (ret != 1) { + fprintf(stderr, "user_getevents timed out.\n"); + return 1; + } + + if (event.obj != iocbp) { + fprintf(stderr, + "Error: event->opj (%p) does not match iocbp (%p)\n", + event.obj, iocbp); + return 1; + } + + return 0; +} + +int +test_main() +{ + int fd; + int ret; + io_context_t ctx; + struct iocb iocb; + + memset(&ctx, 0, sizeof(ctx)); + fd = open_temp_file(); + + ret = io_setup(1, &ctx); + if (ret != 0) { + fprintf(stderr, "io_setup failed with %d\n", ret); + return 1; + } + + /* + * First, try remapping the ring buffer in-between io_setup and + * io_submit. + */ + ctx = remap_io_context(ctx); + if (ctx == NULL) + return 1; + + ret = do_io(ctx, &iocb, fd); + if (ret != 0) + return 1; + + ret = check_completion(ctx, &iocb); + if (ret != 0) + return 1; + + /* + * Now remap the ring in between io_submit and getevents. + */ + ret = do_io(ctx, &iocb, fd); + if (ret != 0) + return 1; + + ctx = remap_io_context(ctx); + if (ctx == NULL) + return 1; + + ret = check_completion(ctx, &iocb); + if (ret != 0) + return 1; + + /* + * Success, clean up. + */ + ret = io_destroy(ctx); + if (ret != 0) { + fprintf(stderr, "io_destroy failed with %d\n", ret); + return 1; + } + close(fd); + + return 0; +} +/* + * Local variables: + * mode: c + * c-basic-offset: 8 + * End: + */ diff --git a/harness/cases/2.t b/harness/cases/2.t new file mode 100644 index 0000000..3a0212d --- /dev/null +++ b/harness/cases/2.t @@ -0,0 +1,41 @@ +/* 2.t +- io_setup (#2) + - with invalid context pointer + - with maxevents <= 0 + - with an already initialized ctxp +*/ + +int attempt(int n, io_context_t *ctxp, int expect) +{ + int res; + + printf("expect %3d: io_setup(%5d, %p) = ", expect, n, ctxp); + fflush(stdout); + res = io_setup(n, ctxp); + printf("%3d [%s]%s\n", res, strerror(-res), + (res != expect) ? " -- FAILED" : ""); + if (res != expect) + return 1; + + return 0; +} + +int test_main(void) +{ + io_context_t ctx; + int status = 0; + + ctx = NULL; + status |= attempt(-1000, KERNEL_RW_POINTER, -EFAULT); + status |= attempt( 1000, KERNEL_RW_POINTER, -EFAULT); + status |= attempt( 0, KERNEL_RW_POINTER, -EFAULT); + status |= attempt(-1000, &ctx, -EINVAL); + status |= attempt( -1, &ctx, -EINVAL); + status |= attempt( 0, &ctx, -EINVAL); + assert(ctx == NULL); + status |= attempt( 1, &ctx, 0); + status |= attempt( 1, &ctx, -EINVAL); + + return status; +} + diff --git a/harness/cases/20.t b/harness/cases/20.t new file mode 100644 index 0000000..3b444c0 --- /dev/null +++ b/harness/cases/20.t @@ -0,0 +1,178 @@ +/* + * Copyright 2017, Red Hat, Inc. + * Author: Jeff Moyer + * Based on test code from Mauricio Faria de Oliveira + * + * License: GPLv2 + * + * Description: Ensure that aio-max-nr requests can be allocated, or, + * if not, that the reason is not faulty accounting. + */ +#include +#include +#include +#include +#include +#include +#include + +#define FAIL 1 + +#define AIO_MAX_NR "/proc/sys/fs/aio-max-nr" +#define AIO_NR "/proc/sys/fs/aio-nr" + +static unsigned aio_max_nr; + +int +read_proc_val(const char *path, unsigned *val) +{ + FILE *fp; + int ret; + + fp = fopen(path, "r"); + if (!fp) { + fprintf(stderr, "Unable to open proc file \"%s\" for reading\n", + path); + return FAIL; + } + + ret = fscanf(fp, "%u\n", val); + fclose(fp); + + if (ret == EOF) { + fprintf(stderr, "Failed to read from proc file \"%s\"\n", + path); + return FAIL; + } + + return 0; +} + +/* + * Create as many ioctx-s with nr_events each as possible (up to aio_max_nr). + * Report any failures of -EAGAIN. + */ +int +do_alloc_ioctxs(int nr_events) +{ + long ret; + unsigned i, avail, aio_nr, nr_ctxs; + io_context_t *ioctx; + + ret = read_proc_val(AIO_NR, &aio_nr); + if (ret) + return FAIL; + + avail = aio_max_nr - aio_nr; + nr_ctxs = avail / nr_events; + ioctx = calloc(nr_ctxs, sizeof(*ioctx)); + if (!ioctx) { + fprintf(stderr, "allocating %u ioctx-s failed with %d\n", + nr_ctxs, errno); + return FAIL; + } + + fprintf(stderr, "Creating %u ioctx-s with %u events each...\n", nr_ctxs, + nr_events); + fflush(stderr); + for (i = 0; i < nr_ctxs; i++) { + ret = io_setup(nr_events, &ioctx[i]); + if (ret) { + /* + * EAGAIN is the only failure case we're interested + * in. -ENOMEM, for example, is expected in this + * test. + */ + if (ret != -EAGAIN) + break; + + fprintf(stderr,"io_setup(%u) failed on iteration %d.\n", + nr_events, i); + fprintf(stderr, "allocated %u of %u possible events.\n", + nr_events * i, aio_max_nr); + ret = read_proc_val(AIO_NR, &aio_nr); + if (ret == 0) + fprintf(stderr, "aio_nr is currently at %u\n", + aio_nr); + + free(ioctx); + return FAIL; + } + } + fprintf(stderr, "Successfully created %u io_context-s\n", i); + if (i < nr_ctxs - 1) + fprintf(stderr, "Last io_setup call returned %ld (%s)\n", ret, + strerror(-ret)); + fflush(stderr); + + return 0; +} + +/* + * We fork off a child to do the actual work. The reason is that each + * io_destroy will incur an rcu grace period to complete. That really + * adds up, depending on the number of io_contexts created. We take + * advantage of an optimization in the kernel that waits for all + * contexts to be torn down in one grace period. In other words, just + * exiting the process without tearing down the ioctx-s using + * io_destroy is way faster. + */ +int +alloc_ioctxs(int nr_events) +{ + pid_t child; + int ret, status; + + child = fork(); + switch (child) { + case 0: /* child */ + ret = do_alloc_ioctxs(nr_events); + exit(ret); + case -1: + fprintf(stderr, "fork() failed with %d\n", errno); + return FAIL; + default: + break; + } + if (waitpid(child, &status, 0) < 0) + return FAIL; + if (WIFEXITED(status) && WEXITSTATUS(status) == 0) + return 0; + + return FAIL; +} + +int +test_main() +{ + int ret; + unsigned nr_events; + + ret = read_proc_val(AIO_MAX_NR, &aio_max_nr); + if (ret) + return FAIL; + + fprintf(stderr, "aio_max_nr: %u\n", aio_max_nr); + + nr_events = 1; + while (1) { + ret = alloc_ioctxs(nr_events); + if (ret) + return FAIL; + + if (nr_events == aio_max_nr) + break; + + nr_events *= 2; + if (nr_events > aio_max_nr) + nr_events = aio_max_nr; + } + + return 0; +} +/* + * Local variables: + * mode: c + * c-basic-offset: 8 + * End: + */ diff --git a/harness/cases/21.t b/harness/cases/21.t new file mode 100644 index 0000000..441eaa8 --- /dev/null +++ b/harness/cases/21.t @@ -0,0 +1,176 @@ +/* + * Copyright 2017, Red Hat, Inc. + * + * Test RWF_NOWAIT. + * + * RWF_NOWAIT will cause -EAGAIN to be returned in the io_event for + * any I/O that cannot be serviced without blocking the submission + * thread. Instances covered by the kernel at the time this test was + * written include: + * - O_DIRECT I/O to a file offset that has populated page cache pages + * - the submission context cannot obtain the inode lock + * - space allocation is necessary + * - we need to wait for other I/O (e.g. in the misaligned I/O case) + * - ... + * + + * The easiest of these to test is that a direct I/O is writing to a + * file offset with populated page cache. We also test to ensure that + * we can perform I/O in the absence of the above conditions. + * + * Author: Jeff Moyer + */ +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include + +#define TEMPLATE "21.XXXXXX" +#define BUFLEN 4096 + +#ifndef RWF_NOWAIT +#define RWF_NOWAIT 0x00000008 +#endif + +int +open_temp_file() +{ + int fd; + char temp_file[sizeof(TEMPLATE)]; + + strncpy(temp_file, TEMPLATE, sizeof(TEMPLATE)); + fd = mkstemp(temp_file); + if (fd < 0) { + perror("mkstemp"); + return -1; + } + unlink(temp_file); + return fd; +} + +int +test_main() +{ + int fd, flags; + int ret; + io_context_t ctx; + struct iocb iocb, *iocbp = &iocb; + struct io_event event; + char buf[BUFLEN] __attribute__((aligned (4096))); + struct iovec iov; + + fd = open_temp_file(); + if (fd < 0) + return 1; + + memset(&ctx, 0, sizeof(ctx)); + ret = io_setup(1, &ctx); + if (ret != 0) { + fprintf(stderr, "io_setup failed with %d\n", ret); + return 1; + } + + /* + * Perform a buffered write to a file. This instantiates the + * block and adds the page to the page cache. + */ + memset(buf, 0xa, BUFLEN); + ret = write(fd, buf, BUFLEN); + if (ret != BUFLEN) { + perror("write"); + return 1; + } + + /* + * Now attempt an aio/dio pwritev2 with the RWF_NONBLOCK flag + * set. + */ + flags = fcntl(fd, F_GETFL); + ret = fcntl(fd, F_SETFL, flags | O_DIRECT); + if (ret != 0) { + perror("fcntl"); + return 1; + } + + memset(buf, 0, BUFLEN); + iov.iov_base = buf; + iov.iov_len = BUFLEN; + io_prep_preadv2(&iocb, fd, &iov, 1, 0, RWF_NOWAIT); + + ret = io_submit(ctx, 1, &iocbp); + + /* + * io_submit will return -EINVAL if RWF_NOWAIT is not supported. + */ + if (ret != 1) { + if (ret == -EINVAL) { + fprintf(stderr, "RWF_NOWAIT not supported by kernel.\n"); + /* just return success */ + return 0; + } + errno = -ret; + perror("io_submit"); + return 1; + } + + ret = io_getevents(ctx, 1, 1, &event, NULL); + if (ret != 1) { + errno = -ret; + perror("io_getevents"); + return 1; + } + + /* + * We expect -EAGAIN due to the existence of a page cache page + * for the file system block we are writing. + */ + if (event.res != -EAGAIN) { + fprintf(stderr, "Expected -EAGAIN, got %lu\n", event.res); + return 1; + } + + /* + * An O_DIRECT write to the page will force the page out of the + * page cache, allowing the subsequent RWF_NOWAIT I/O to complete. + */ + ret = pwrite(fd, buf, BUFLEN, 0); + if (ret != BUFLEN) { + perror("write"); + return 1; + } + + /* + * Now retry the RWF_NOWAIT I/O. This should succeed. + */ + ret = io_submit(ctx, 1, &iocbp); + if (ret != 1) { + errno = -ret; + perror("io_submit"); + return 1; + } + + ret = io_getevents(ctx, 1, 1, &event, NULL); + if (ret != 1) { + errno = -ret; + perror("io_getevents"); + return 1; + } + + if (event.res != BUFLEN) { + fprintf(stderr, "Expected %d, got %lu\n", BUFLEN, event.res); + return 1; + } + + return 0; +} +/* + * Local variables: + * mode: c + * c-basic-offset: 8 + * End: + */ diff --git a/harness/cases/22.t b/harness/cases/22.t new file mode 100644 index 0000000..c7428a8 --- /dev/null +++ b/harness/cases/22.t @@ -0,0 +1,149 @@ +/* + * Copyright (C) 2006-2018 Free Software Foundation, Inc. + * Copyright (C) 2018 Christoph Hellwig. + * License: LGPLv2.1 or later. + * + * Description: test aio poll and io_pgetevents signal handling. + * + * Very roughly based on glibc tst-pselect.c. + */ +#include +#include +#include +#include +#include +#include +#include + +static volatile int handler_called; + +static void +handler(int sig) +{ + handler_called = 1; +} + +int test_main(void) +{ + struct timespec to = { .tv_sec = 0, .tv_nsec = 500000000 }; + pid_t parent = getpid(), p; + int pipe1[2], pipe2[2]; + struct sigaction sa = { .sa_flags = 0 }; + sigset_t sigmask; + struct io_context *ctx = NULL; + struct io_event ev; + struct iocb iocb; + struct iocb *iocbs[] = { &iocb }; + int ret; + + sigemptyset(&sa.sa_mask); + + sa.sa_handler = handler; + if (sigaction(SIGUSR1, &sa, NULL) != 0) { + printf("sigaction(1) failed\n"); + return 1; + } + + sa.sa_handler = SIG_IGN; + if (sigaction(SIGCHLD, &sa, NULL) != 0) { + printf("sigaction(2) failed\n"); + return 1; + } + + sigemptyset(&sigmask); + sigaddset(&sigmask, SIGUSR1); + if (sigprocmask(SIG_BLOCK, &sigmask, NULL) != 0) { + printf("sigprocmask failed\n"); + return 1; + } + + if (pipe(pipe1) != 0 || pipe(pipe2) != 0) { + printf("pipe failed\n"); + return 1; + } + + sigprocmask(SIG_SETMASK, NULL, &sigmask); + sigdelset(&sigmask, SIGUSR1); + + p = fork(); + switch (p) { + case -1: + printf("fork failed\n"); + exit(2); + case 0: + close(pipe1[1]); + close(pipe2[0]); + + ret = io_setup(1, &ctx); + if (ret) { + printf("child: io_setup failed\n"); + return 1; + } + + io_prep_poll(&iocb, pipe1[0], POLLIN); + ret = io_submit(ctx, 1, iocbs); + if (ret != 1) { + printf("child: io_submit failed\n"); + return 1; + } + + do { + if (getppid() != parent) { + printf("parent died\n"); + exit(2); + } + ret = io_pgetevents(ctx, 1, 1, &ev, &to, &sigmask); + } while (ret == 0); + + if (ret != -EINTR) { + printf("child: io_pgetevents did not set errno to EINTR\n"); + return 1; + } + + do { + errno = 0; + ret = write(pipe2[1], "foo", 3); + } while (ret == -1 && errno == EINTR); + + exit(0); + default: + close(pipe1[0]); + close(pipe2[1]); + + io_prep_poll(&iocb, pipe2[0], POLLIN); + + ret = io_setup(1, &ctx); + if (ret) { + printf("parent: io_setup failed\n"); + return 1; + } + + ret = io_submit(ctx, 1, iocbs); + if (ret != 1) { + printf("parent: io_submit failed\n"); + return 1; + } + + kill(p, SIGUSR1); + + ret = io_pgetevents(ctx, 1, 1, &ev, NULL, &sigmask); + if (ret < 0) { + printf("parent: io_pgetevents failed\n"); + return 1; + } + if (ret != 1) { + printf("parent: io_pgetevents did not report event\n"); + return 1; + } + if (ev.obj != &iocb) { + printf("parent: io_pgetevents reports wrong fd\n"); + return 1; + } + if ((ev.res & POLLIN) != POLLIN) { + printf("parent: io_pgetevents did not report readable fd\n"); + return 1; + } + + return 0; + } +} diff --git a/harness/cases/3.t b/harness/cases/3.t new file mode 100644 index 0000000..7773d80 --- /dev/null +++ b/harness/cases/3.t @@ -0,0 +1,25 @@ +/* 3.t +- io_submit/io_getevents with invalid addresses (3.t) + +*/ +#include "aio_setup.h" + +int test_main(void) +{ + struct iocb a, b; + struct iocb *good_ios[] = { &a, &b }; + struct iocb *bad1_ios[] = { NULL, &b }; + struct iocb *bad2_ios[] = { KERNEL_RW_POINTER, &a }; + int status = 0; + + status |= attempt_io_submit(BAD_CTX, 1, good_ios, -EINVAL); + status |= attempt_io_submit( io_ctx, 0, good_ios, 0); + status |= attempt_io_submit( io_ctx, 1, NULL, -EFAULT); + status |= attempt_io_submit( io_ctx, 1, (void *)-1, -EFAULT); + status |= attempt_io_submit( io_ctx, 2, bad1_ios, -EFAULT); + status |= attempt_io_submit( io_ctx, 2, bad2_ios, -EFAULT); + status |= attempt_io_submit( io_ctx, -1, good_ios, -EINVAL); + + return status; +} + diff --git a/harness/cases/4.t b/harness/cases/4.t new file mode 100644 index 0000000..972b4f2 --- /dev/null +++ b/harness/cases/4.t @@ -0,0 +1,72 @@ +/* 4.t +- read of descriptor without read permission (4.t) +- write to descriptor without write permission (4.t) +- check that O_APPEND writes actually append + +*/ +#include "aio_setup.h" + +#define SIZE 512 +#define READ 'r' +#define WRITE 'w' +int attempt(int fd, void *buf, int count, long long pos, int rw, int expect) +{ + struct iocb iocb; + int res; + + switch(rw) { + case READ: io_prep_pread (&iocb, fd, buf, count, pos); break; + case WRITE: io_prep_pwrite(&iocb, fd, buf, count, pos); break; + } + + printf("expect %3d: (%c), res = ", expect, rw); + fflush(stdout); + res = sync_submit(&iocb); + printf("%3d [%s]%s\n", res, (res <= 0) ? strerror(-res) : "Success", + (res != expect) ? " -- FAILED" : ""); + if (res != expect) + return 1; + + return 0; +} + +int test_main(void) +{ + char buf[SIZE]; + int rofd, wofd, rwfd; + int status = 0, res; + + memset(buf, 0, SIZE); + + rofd = open("testdir/rofile", O_RDONLY); assert(rofd != -1); + wofd = open("testdir/wofile", O_WRONLY); assert(wofd != -1); + rwfd = open("testdir/rwfile", O_RDWR); assert(rwfd != -1); + + status |= attempt(rofd, buf, SIZE, 0, WRITE, -EBADF); + status |= attempt(wofd, buf, SIZE, 0, READ, -EBADF); + status |= attempt(rwfd, buf, SIZE, 0, WRITE, SIZE); + status |= attempt(rwfd, buf, SIZE, 0, READ, SIZE); + status |= attempt(rwfd, buf, SIZE, -1, READ, -EINVAL); + status |= attempt(rwfd, buf, SIZE, -1, WRITE, -EINVAL); + + rwfd = open("testdir/rwfile", O_RDWR|O_APPEND); assert(rwfd != -1); + res = ftruncate(rwfd, 0); assert(res == 0); + status |= attempt(rwfd, buf, SIZE, 0, READ, 0); + status |= attempt(rwfd, "1234", 4, 0, WRITE, 4); + status |= attempt(rwfd, "5678", 4, 0, WRITE, 4); + memset(buf, 0, SIZE); + status |= attempt(rwfd, buf, SIZE, 0, READ, 8); + printf("read after append: [%s]\n", buf); + assert(memcmp(buf, "12345678", 8) == 0); + + status |= attempt(rwfd, KERNEL_RW_POINTER, SIZE, 0, READ, -EFAULT); + status |= attempt(rwfd, KERNEL_RW_POINTER, SIZE, 0, WRITE, -EFAULT); + + /* Some architectures map the 0 page. Ugh. */ +#if !defined(__ia64__) + status |= attempt(rwfd, NULL, SIZE, 0, WRITE, -EFAULT); +#endif + + return status; +} + diff --git a/harness/cases/5.t b/harness/cases/5.t new file mode 100644 index 0000000..2b4b4bb --- /dev/null +++ b/harness/cases/5.t @@ -0,0 +1,54 @@ +/* 5.t +- Write from a mmap() of the same file. (5.t) +*/ +#include "aio_setup.h" +#include +#include + +int test_main(void) +{ + int page_size = getpagesize(); +#define SIZE 512 + char *buf; + int rwfd; + int status = 0, res; + + rwfd = open("testdir/rwfile", O_RDWR); assert(rwfd != -1); + res = ftruncate(rwfd, 512); assert(res == 0); + + buf = mmap(0, page_size, PROT_READ|PROT_WRITE, MAP_SHARED, rwfd, 0); + assert(buf != (char *)-1); + + status |= attempt_rw(rwfd, buf, SIZE, 0, WRITE, SIZE); + status |= attempt_rw(rwfd, buf, SIZE, 0, READ, SIZE); + + res = munmap(buf, page_size); assert(res == 0); + buf = mmap(0, page_size, PROT_READ|PROT_WRITE, MAP_SHARED, rwfd, 0); + assert(buf != (char *)-1); + + status |= attempt_rw(rwfd, buf, SIZE, 0, READ, SIZE); + status |= attempt_rw(rwfd, buf, SIZE, 0, WRITE, SIZE); + + res = munmap(buf, page_size); assert(res == 0); + buf = mmap(0, page_size, PROT_READ, MAP_SHARED, rwfd, 0); + assert(buf != (char *)-1); + + status |= attempt_rw(rwfd, buf, SIZE, 0, WRITE, SIZE); + status |= attempt_rw(rwfd, buf, SIZE, 0, READ, -EFAULT); + + res = munmap(buf, page_size); assert(res == 0); + buf = mmap(0, page_size, PROT_WRITE, MAP_SHARED, rwfd, 0); + assert(buf != (char *)-1); + + status |= attempt_rw(rwfd, buf, SIZE, 0, READ, SIZE); + + /* Whether PROT_WRITE is readable is arch-dependent. So compare + * against read result. */ + res = read(rwfd, buf, SIZE); + if (res < 0) + res = -errno; + status |= attempt_rw(rwfd, buf, SIZE, 0, WRITE, res); + + return status; +} + diff --git a/harness/cases/6.t b/harness/cases/6.t new file mode 100644 index 0000000..cea4b01 --- /dev/null +++ b/harness/cases/6.t @@ -0,0 +1,57 @@ +/* 6.t +- huge reads (pinned pages) (6.t) +- huge writes (6.t) +*/ +#include "aio_setup.h" +#include + +long getmemsize(void) +{ + FILE *f = fopen("/proc/meminfo", "r"); + long size; + int gotit = 0; + char str[256]; + + assert(f != NULL); + while (NULL != fgets(str, 255, f)) { + str[255] = 0; + if (0 == memcmp(str, "MemTotal:", 9)) { + if (1 == sscanf(str + 9, "%ld", &size)) { + gotit = 1; + break; + } + } + } + fclose(f); + + assert(gotit != 0); + return size; +} + +int test_main(void) +{ + char *buf; + int rwfd; + int status = 0, res; + long size; + + size = getmemsize(); + printf("size = %ld\n", size); + assert(size >= (16 * 1024)); + if (size > (768 * 1024)) + size = 768 * 1024; + size *= 1024; + + rwfd = open("testdir/rwfile", O_RDWR); assert(rwfd != -1); + res = ftruncate(rwfd, 0); assert(res == 0); + buf = malloc(size); assert(buf != NULL); + + //memset(buf, 0, size); + status |= attempt_rw(rwfd, buf, size, 0, WRITE, size); + status |= attempt_rw(rwfd, buf, size, 0, READ, size); + + //res = ftruncate(rwfd, 0); assert(res == 0); + + return status; +} + diff --git a/harness/cases/7.t b/harness/cases/7.t new file mode 100644 index 0000000..f877d8a --- /dev/null +++ b/harness/cases/7.t @@ -0,0 +1,30 @@ +/* 7.t +- Write overlapping the file size rlimit boundary: should be a short + write. (7.t) +- Write at the file size rlimit boundary: should give EFBIG. (I think + the spec requires that you do NOT deliver SIGXFSZ in this case, where + you would do so for sync IO.) (7.t) +- Special case: a write of zero bytes at or beyond the file size rlimit + boundary must return success. (7.t) +*/ + +#include +#include + +void SET_RLIMIT(long long limit) +{ + struct rlimit rlim; + int res; + + /* Seems that we do send SIGXFSZ, but hard to fix... */ + signal(SIGXFSZ, SIG_IGN); + rlim.rlim_cur = limit; assert(rlim.rlim_cur == limit); + rlim.rlim_max = limit; assert(rlim.rlim_max == limit); + + res = setrlimit(RLIMIT_FSIZE, &rlim); assert(res == 0); +} + +#define LIMIT 8192 +#define FILENAME "testdir/rwfile" + +#include "common-7-8.h" diff --git a/harness/cases/8.t b/harness/cases/8.t new file mode 100644 index 0000000..dbcf044 --- /dev/null +++ b/harness/cases/8.t @@ -0,0 +1,30 @@ +/* 8.t +- Ditto for the above three tests at the offset maximum (largest + possible ext2/3 file size.) (8.t) + */ +#include +#include + +long long get_fs_limit(int fd) +{ + long long min = 0, max = 9223372036854775807LL; + char c = 0; + int ret; + + while (max - min > 1) { + if (pwrite64(fd, &c, 1, (min + max) / 2) == -1) + max = (min + max) / 2; + else { + ret = ftruncate(fd, 0); + assert(ret == 0); + min = (min + max) / 2; + } + } + return max; +} + +#define SET_RLIMIT(x) do ; while (0) +#define LIMIT get_fs_limit(rwfd) +#define FILENAME "testdir.ext2/rwfile" + +#include "common-7-8.h" diff --git a/harness/cases/aio_setup.h b/harness/cases/aio_setup.h new file mode 100644 index 0000000..1914915 --- /dev/null +++ b/harness/cases/aio_setup.h @@ -0,0 +1,108 @@ +#include +io_context_t io_ctx; +#define BAD_CTX ((io_context_t)-1) + +void aio_setup(int n) +{ + int res = io_queue_init(n, &io_ctx); + if (res != 0) { + printf("io_queue_setup(%d) returned %d (%s)\n", + n, res, strerror(-res)); + exit(3); + } +} + +int attempt_io_submit(io_context_t ctx, long nr, struct iocb *ios[], int expect) +{ + int res; + + printf("expect %3d: io_submit(%10p, %3ld, %10p) = ", expect, ctx, nr, ios); + fflush(stdout); + res = io_submit(ctx, nr, ios); + printf("%3d [%s]%s\n", res, (res <= 0) ? strerror(-res) : "", + (res != expect) ? " -- FAILED" : ""); + if (res != expect) + return 1; + + return 0; +} + +int sync_submit(struct iocb *iocb) +{ + struct io_event event; + struct iocb *iocbs[] = { iocb }; + int res; + + /* 30 second timeout should be enough */ + struct timespec ts; + ts.tv_sec = 30; + ts.tv_nsec = 0; + + res = io_submit(io_ctx, 1, iocbs); + if (res != 1) { + printf("sync_submit: io_submit res=%d [%s]\n", res, strerror(-res)); + return res; + } + + res = io_getevents(io_ctx, 0, 1, &event, &ts); + if (res != 1) { + printf("sync_submit: io_getevents res=%d [%s]\n", res, strerror(-res)); + return res; + } + return event.res; +} + +#define SETUP aio_setup(1024) + + +#define READ 'r' +#define WRITE 'w' +#define READ_SILENT 'R' +#define WRITE_SILENT 'W' +#define READV '<' +#define WRITEV '>' + +int attempt_rw(int fd, void *buf, int count, long long pos, int rw, int expect) +{ + struct iocb iocb; + int res; + int silent = 0; + + switch(rw) { + case READ_SILENT: + silent = 1; + case READ: + io_prep_pread (&iocb, fd, buf, count, pos); + break; + case WRITE_SILENT: + silent = 1; + case WRITE: + io_prep_pwrite(&iocb, fd, buf, count, pos); + break; + case WRITEV: + io_prep_pwritev(&iocb, fd, buf, count, pos); + break; + case READV: + io_prep_preadv(&iocb, fd, buf, count, pos); + break; + } + + if (!silent) { + printf("expect %5d: (%c), res = ", expect, rw); + fflush(stdout); + } + res = sync_submit(&iocb); + if (!silent || res != expect) { + if (silent) + printf("expect %5d: (%c), res = ", expect, rw); + printf("%5d [%s]%s\n", res, + (res <= 0) ? strerror(-res) : "Success", + (res != expect) ? " -- FAILED" : ""); + } + + if (res != expect) + return 1; + + return 0; +} + diff --git a/harness/cases/common-7-8.h b/harness/cases/common-7-8.h new file mode 100644 index 0000000..fc54bbf --- /dev/null +++ b/harness/cases/common-7-8.h @@ -0,0 +1,38 @@ +/* common-7-8.h +*/ +#include "aio_setup.h" + +#include +#include + +#define SIZE 512 + +int test_main(void) +{ + char *buf; + int rwfd; + int status = 0, res; + long long limit; + + rwfd = open(FILENAME, O_RDWR|O_CREAT, 0600); assert(rwfd != -1); + res = ftruncate(rwfd, 0); assert(res == 0); + buf = malloc(SIZE); assert(buf != NULL); + memset(buf, 0, SIZE); + + limit = LIMIT; + + SET_RLIMIT(limit); + + status |= attempt_rw(rwfd, buf, SIZE, limit-SIZE, WRITE, SIZE); + status |= attempt_rw(rwfd, buf, SIZE, limit-SIZE, READ, SIZE); + + status |= attempt_rw(rwfd, buf, SIZE, 1+limit-SIZE, WRITE, SIZE-1); + status |= attempt_rw(rwfd, buf, SIZE, 1+limit-SIZE, READ, SIZE-1); + + status |= attempt_rw(rwfd, buf, SIZE, limit, WRITE, -EFBIG); + status |= attempt_rw(rwfd, buf, SIZE, limit, READ, 0); + status |= attempt_rw(rwfd, buf, 0, limit, WRITE, 0); + + return status; +} + diff --git a/harness/ext2-enospc.img b/harness/ext2-enospc.img new file mode 100644 index 0000000000000000000000000000000000000000..fa567450c61fb4b9da24280d90d16648cd95b8eb GIT binary patch literal 409600 zcmeI*2e4dK0f6E6UMNcMz4zWb1QL4hz4satdhZcJ2Yc_R*u{z+6g&1VcCn!m3pP~Z zzl4mD85x~nkeT;-FY)RG-MDUFj@sRYsYULp1$|vyIQLryE%R3 za$VKnaT5=nGIoQ*r;HtG#eNSCKl->$4x2J|XwRwL`>*HC+42>G6^)S^{C%t4zu9&V zU$finH;*}D$#L6EK4Wm7L4d&jS71z=pQUE4@y-6hd^HFV7##xBrcIl%qmQ&~5Fjv` z1QvaC`j4hPwyy*TJSH&XAOFiH-?i0@W~_TEuBVPaePr+h$fcb}xcjlDZ+FbU{VX!q zQ}JSKg8+e1B`|e-ySv`d(2UQ%!S!^2k)g$Al9;>Yd1~I8uja1>YQb8l7Oq8V@ZYJK zpV!SN9JgKB=U-lf|C5g_p73H19n1_1)2RKWZnr3c<_6G*`P z_ckCfdIik?(R=LeIRW$Edw{^`6)^ut@3FV%1k8W$0Rp2}!2BP*$KIY3F#o*=2#j6< z^MCXndwWj6{P!LpK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5Fqfh6j-HKwQ8+atJfN}X026g*Wi0W)~&uGduaXIpf;?HYUA3Z zHm%KS^V*`etihLPkFBk1n;KW+Yunncwyzy($J(iOu3c)^+O2l4J!;R|tM;yaYTw$g z_OAo#z&fZ7u0zToK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBoL-vmzT;V0KAb!wef z6YBJuSd;3EIw>zlE~<;`lDf1mtIO+(y0WgS$@Q$dx~{1yb!}Z& z*VheoW8G9Y*DZBxJ-eP$&#mXx^Xmom!g^7?xL#5(t(Voy>lO9NdR4u;UQ@5F+v;`o z`g%j%UT>^7)tl=r_11b@y}jO1cho!UUG?sIPrbL^S9jL?>jU+{`cQqiK2jg8yXx-x zSber?gV`b>ScK3DhF=j#jg#rjfxxxP|gt*_PB>l^jW`c{3rzEk(rck6rg z{rW-uuzplOuAkKX_0#%U{k(orzpMx9SM}?fTED3W>$mm0`hESO9;!dqpX$%`m-=fx zTz{*-*FWl!npQ)@{i%@}Q?t~pHCxSI&!}hC95rXnRdd%oHE+#V^Vb5kU@cS&*CMrO zEmn)y618M4RZG`0wQMa{%hw9EVy#pw*DAGYtyZho8ntGvRcqHewQj9f>(>UgVQo|! z*Cw@TZC0Dt7PVzxeqCj;f>Um^!wOtK;j0IiW8&ZmgT?=DMW}0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N o0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly@ZSpj6G;@@DF6Tf literal 0 HcmV?d00001 diff --git a/harness/main.c b/harness/main.c new file mode 100644 index 0000000..9ecd5da --- /dev/null +++ b/harness/main.c @@ -0,0 +1,40 @@ +#define _GNU_SOURCE +#include +#include +#include +#include + +#include +#include +#include +#include + +#include + +#if __LP64__ == 0 +#define KERNEL_RW_POINTER ((void *)0xc0010000) +#else +//#warning Not really sure where kernel memory is. Guessing. +#define KERNEL_RW_POINTER ((void *)0xffffffff81000000) +#endif + + +char test_name[] = TEST_NAME; + +#include TEST_NAME + +int main(void) +{ + int res; + +#if defined(SETUP) + SETUP; +#endif + + res = test_main(); + printf("test %s completed %s.\n", test_name, + res ? "FAILED" : "PASSED" + ); + fflush(stdout); + return res ? 1 : 0; +} diff --git a/harness/runtests.sh b/harness/runtests.sh new file mode 100755 index 0000000..717c72a --- /dev/null +++ b/harness/runtests.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +passes=0 +fails=0 + +echo "Test run starting at" `date` + +while [ $# -ge 1 ] ; do + this_test=$1 + shift + echo "Starting $this_test" + $this_test 2>&1 + res=$? + if [ $res -eq 0 ] ; then str="" ; passes=$[passes + 1] ; else str=" -- FAILED" ; fails=$[fails + 1] ; fi + echo "Completed $this_test with $res$str". +done + +echo "Pass: $passes Fail: $fails" +echo "Test run complete at" `date` diff --git a/libaio.spec b/libaio.spec new file mode 100644 index 0000000..d1fa2cc --- /dev/null +++ b/libaio.spec @@ -0,0 +1,221 @@ +Name: libaio +Version: 0.3.112 +Release: 1 +Summary: Linux-native asynchronous I/O access library +License: LGPL +Group: System Environment/Libraries +Source: %{name}-%{version}.tar.gz +BuildRoot: %{_tmppath}/%{name}-root +# Fix ExclusiveArch as we implement this functionality on more architectures +ExclusiveArch: i386 x86_64 ia64 s390 s390x ppc ppc64 ppc64pseries ppc64iseries alpha alphaev6 %{arm} + +%description +The Linux-native asynchronous I/O facility ("async I/O", or "aio") has a +richer API and capability set than the simple POSIX async I/O facility. +This library, libaio, provides the Linux-native API for async I/O. +The POSIX async I/O facility requires this library in order to provide +kernel-accelerated async I/O capabilities, as do applications which +require the Linux-native async I/O API. + +%package devel +Summary: Development files for Linux-native asynchronous I/O access +Group: Development/System +Requires: libaio +Provides: libaio.so.1 + +%description devel +This package provides header files to include and libraries to link with +for the Linux-native asynchronous I/O facility ("async I/O", or "aio"). + +%prep +%setup + +%build +make + +%install +[ "$RPM_BUILD_ROOT" != "/" ] && rm -rf $RPM_BUILD_ROOT + +make install DESTDIR=$RPM_BUILD_ROOT prefix=/usr libdir=/%{_libdir} + +%clean +[ "$RPM_BUILD_ROOT" != "/" ] && rm -rf $RPM_BUILD_ROOT + +%post -p /sbin/ldconfig + +%postun -p /sbin/ldconfig + +%files +%defattr(-,root,root) +%attr(0755,root,root) %{_libdir}/libaio.so.* +%doc COPYING TODO + +%files devel +%defattr(-,root,root) +%attr(0644,root,root) %{_includedir}/* +%attr(0755,root,root) %{_libdir}/libaio.so +%attr(0644,root,root) %{_libdir}/libaio.a + +%changelog +* Mon Oct 22 2018 Jeff Moyer - 0.3.112-1 +- Add async poll support (Christoph Hellwig) +- Use canonical DESTDIR= environment variable (Thomas Petazzoni) +- Add ability to disable building the shared library (Thomas Petazzoni) + +* Tue Mar 6 2018 Jeff Moyer - 0.3.111-1 +- Add two new tests to the test harness (Jeff Moyer) +- Generic arch dectection for padding defines (Nathan Rossi) +- harness: don't hardcode page size (Jeff Moyer) +- harness: add a test case for mremap (Jeff Moyer) +- libaio: harness: fix build errors due to attribute warn_unused_result (Mauricio Faria de Oliveira) +- libaio: harness: fix build error due to linker search order (Mauricio Faria de Oliveira) +- harness: add test for allocating aio-max-nr ioctxs (Jeff Moyer) +- Add support for preadv2/pwritev2 (Jeff Moyer) +- syscall-generic: don't overwrite errno (Jeff Moyer) +- syscall: get rid of custom syscall implementation (Jeff Moyer) +- Change syscall-arm64.h to syscall-generic.h (Icenowy Zheng) +- Use generic syscall number schema for RISC-V (Icenowy Zheng) +- Add endian detection (LE) and bit width detection (32/64) for RISC-V (Icenowy Zheng) +- Makefile: convert tag and archive targets to git (Jeff Moyer) + +* Fri Jul 5 2013 Jeff Moyer - 0.3.110-1 +- Add suport for sparc and arm64 (Mike Frysinger and Jeff Moyer) +- Add generic syscall fallbacks (Mike Frysinger) +- Update man pages (Jeff Moyer and Cyril Hrubis) +- Build system fixes (Mike Frysinger) + +* Tue Jun 9 2009 Jeff Moyer - 0.3.108-1 +- add ARM architecture support (grabbed from Debian arches tree) +- replace check of __i386__ with __LP64__ in test harness (Jeff Moyer) +- change the .spec's copyright tag to license + +* Wed Jan 9 2008 Jeff Moyer - 0.3.107-1 +- Fix the test harness (Rusty Russell) +- Add eventfd support (Rusty Russell) + +* Tue Jan 3 2006 Jeff Moyer - 0.3.106-1 +- Add a .proc directive for the ia64_aio_raw_syscall macro. This sounds a lot + like the previous entry, but that one fixed the __ia64_raw_syscall macro, + located in syscall-ia64.h. This macro is in raw_syscall.c, which pretty much + only exists for ia64. This bug prevented the package from building with + newer version of gcc. + +* Mon Aug 1 2005 Jeff Moyer - 0.3.105-1 +- Add a .proc directive for the ia64 raw syscall macro. + +* Fri Apr 1 2005 Jeff Moyer - 0.3.104-1 +- Add Alpha architecture support. (Sergey Tikhonov ) + +* Tue Jan 25 2005 Jeff Moyer - 0.3.103-1 +- Fix SONAME breakage. In changing file names around, I also changed the + SONAME, which is a no no. + +* Thu Oct 14 2004 Jeff Moyer - 0.3.102-1 +- S390 asm had a bug; I forgot to update the clobber list. Lucky for me, + newer compilers complain about such things. +- Also update the s390 asm to look more like the new kernel variants. + +* Wed Oct 13 2004 Jeff Moyer - 0.3.101-1 +- Revert syscall return values to be -ERRNO. This was an inadvertant bug + introduced when clobber lists changed. +- add ppc64pseries and ppc64iseries to exclusivearch + +* Tue Sep 14 2004 Jeff Moyer - 0.3.100-1 +- Switch around the tests for _PPC_ and _powerpc64_ so that the ppc64 + platforms get the right padding. + +* Wed Jul 14 2004 Jeff Moyer - 0.3.99-4 +- Ok, there was a race in moving the cvs module. Someone rebuild from + the old cvs into fc3. *sigh* bumping rev. + +* Wed Jul 14 2004 Jeff Moyer - 0.3.99-3 +- Actually provide libaio.so.1. + +* Tue Mar 30 2004 Jeff Moyer - 0.3.99-2 +- Apparently the 0.3.93 patch was not meant for 0.3.96. Backed it out. + +* Tue Mar 30 2004 Jeff Moyer - 0.3.99-1 +- Fix compat calls. +- make library .so.1.0.0 and make symlinks properly. +- Fix header file for inclusion in c++ code. + +* Thu Feb 26 2004 Jeff Moyer 0.3.98-2 +- bah. fix version nr in changelog. + +* Thu Feb 26 2004 Jeff Moyer 0.3.98-1 +- fix compiler warnings. + +* Thu Feb 26 2004 Jeff Moyer 0.3.97-2 +- make srpm was using rpm to do a build. changed that to use rpmbuild if + it exists, and fallback to rpm if it doesn't. + +* Tue Feb 24 2004 Jeff Moyer 0.3.97-1 +- Use libc syscall(2) instead of rolling our own calling mechanism. This + change is inspired due to a failure to build with newer gcc, since clobber + lists were wrong. +- Add -fpic to the CFLAGS for all architectures. Should address bz #109457. +- change a #include from to . Fixes a build + issue on s390. + +* Wed Jul 7 2003 Bill Nottingham 0.3.96-3 +- fix paths on lib64 arches + +* Wed Jun 18 2003 Michael K. Johnson 0.3.96-2 +- optimization in io_getevents from Arjan van de Ven in 0.3.96-1 +- deal with ia64 in 0.3.96-2 + +* Wed May 28 2003 Michael K. Johnson 0.3.95-1 +- ppc bugfix from Julie DeWandel + +* Tue May 20 2003 Michael K. Johnson 0.3.94-1 +- symbol versioning fix from Ulrich Drepper + +* Mon Jan 27 2003 Benjamin LaHaise +- bump to 0.3.93-3 for rebuild. + +* Mon Dec 16 2002 Benjamin LaHaise +- libaio 0.3.93 test release +- add powerpc support from Gianni Tedesco +- add s/390 support from Arnd Bergmann + +* Fri Sep 12 2002 Benjamin LaHaise +- libaio 0.3.92 test release +- build on x86-64 + +* Thu Sep 12 2002 Benjamin LaHaise +- libaio 0.3.91 test release +- build on ia64 +- remove libredhat-kernel from the .spec file + +* Thu Sep 5 2002 Benjamin LaHaise +- libaio 0.3.90 test release + +* Mon Apr 29 2002 Benjamin LaHaise +- add requires initscripts >= 6.47-1 to get boot time libredhat-kernel + linkage correct. +- typo fix + +* Thu Apr 25 2002 Benjamin LaHaise +- make /usr/lib/libredhat-kernel.so point to /lib/libredhat-kernel.so.1.0.0 + +* Mon Apr 15 2002 Tim Powers +- make the post scriptlet not use /bin/sh + +* Sat Apr 12 2002 Benjamin LaHaise +- add /lib/libredhat-kernel* to %files. + +* Fri Apr 12 2002 Benjamin LaHaise +- make the dummy install as /lib/libredhat-kernel.so.1.0.0 so + that ldconfig will link against it if no other is installed. + +* Tue Jan 22 2002 Benjamin LaHaise +- add io_getevents + +* Tue Jan 22 2002 Michael K. Johnson +- Make linker happy with /usr/lib symlink for libredhat-kernel.so + +* Mon Jan 21 2002 Michael K. Johnson +- Added stub library + +* Sun Jan 20 2002 Michael K. Johnson +- Initial packaging diff --git a/man/io.3 b/man/io.3 new file mode 100644 index 0000000..d910a68 --- /dev/null +++ b/man/io.3 @@ -0,0 +1,351 @@ +.TH io 3 2002-09-12 "Linux 2.4" Linux IO" +.SH NAME +io \- Asynchronous IO +.SH SYNOPSYS +.nf +.B #include +.sp +.br +.B #include +.sp +.fi +.SH DESCRIPTION +The libaio library defines a new set of I/O operations which can +significantly reduce the time an application spends waiting at I/O. The +new functions allow a program to initiate one or more I/O operations and +then immediately resume normal work while the I/O operations are +executed in parallel. + +These functions are part of the library with realtime functions named +.IR "libaio" +. They are not actually part of the +.IR "libc" +binary. +The implementation of these functions can be done using support in the +kernel. + +All IO operations operate on files which were opened previously. There +might be arbitrarily many operations running for one file. The +asynchronous I/O operations are controlled using a data structure named +.IR "struct iocb" +It is defined in +.IR "libio.h" +as follows. + +.nf + +typedef struct io_context *io_context_t; + +typedef enum io_iocb_cmd { + IO_CMD_PREAD = 0, + IO_CMD_PWRITE = 1, + + IO_CMD_FSYNC = 2, + IO_CMD_FDSYNC = 3, + + IO_CMD_POLL = 5, + IO_CMD_NOOP = 6, +} io_iocb_cmd_t; + +struct io_iocb_common { + void *buf; + unsigned __pad1; + long nbytes; + unsigned __pad2; + long long offset; + long long __pad3, __pad4; +}; /* result code is the amount read or -'ve errno */ + + +struct iocb { + void *data; + unsigned key; + short aio_lio_opcode; + short aio_reqprio; + int aio_fildes; + union { + struct io_iocb_common c; + struct io_iocb_vector v; + struct io_iocb_poll poll; + struct io_iocb_sockaddr saddr; + } u; +}; + + +.fi +.TP +.IR "int aio_fildes" +This element specifies the file descriptor to be used for the +operation. It must be a legal descriptor, otherwise the operation will +fail. + +The device on which the file is opened must allow the seek operation. +I.e., it is not possible to use any of the IO operations on devices +like terminals where an +.IR "lseek" +call would lead to an error. +.TP +.IR "long u.c.offset" +This element specifies the offset in the file at which the operation (input +or output) is performed. Since the operations are carried out in arbitrary +order and more than one operation for one file descriptor can be +started, one cannot expect a current read/write position of the file +descriptor. +.TP +.IR "void *buf" +This is a pointer to the buffer with the data to be written or the place +where the read data is stored. +.TP +.IR "long u.c.nbytes" +This element specifies the length of the buffer pointed to by +.IR "io_buf" +. +.TP +.IR "int aio_reqprio" +Is not currently used. +.TP +.B "IO_CMD_PREAD" +Start a read operation. Read from the file at position +.IR "u.c.offset" +and store the next +.IR "u.c.nbytes" +bytes in the +buffer pointed to by +.IR "buf" +. +.TP +.B "IO_CMD_PWRITE" +Start a write operation. Write +.IR "u.c.nbytes" +bytes starting at +.IR "buf" +into the file starting at position +.IR "u.c.offset" +. +.TP +.B "IO_CMD_NOP" +Do nothing for this control block. This value is useful sometimes when +an array of +.IR "struct iocb" +values contains holes, i.e., some of the +values must not be handled although the whole array is presented to the +.IR "io_submit" +function. +.TP +.B "IO_CMD_FSYNC" +.TP +.B "IO_CMD_POLL" +This is experimental. +.SH EXAMPLE +.nf +/* + * Simplistic version of copy command using async i/o + * + * From: Stephen Hemminger + * Copy file by using a async I/O state machine. + * 1. Start read request + * 2. When read completes turn it into a write request + * 3. When write completes decrement counter and free resources + * + * + * Usage: aiocp file(s) desination + */ + +#include +#include +#include +#include +#include +#include +#include + +#include + +#define AIO_BLKSIZE (64*1024) +#define AIO_MAXIO 32 + +static int busy = 0; // # of I/O's in flight +static int tocopy = 0; // # of blocks left to copy +static int dstfd = -1; // destination file descriptor +static const char *dstname = NULL; +static const char *srcname = NULL; + + +/* Fatal error handler */ +static void io_error(const char *func, int rc) +{ + if (rc == -ENOSYS) + fprintf(stderr, "AIO not in this kernel\n"); + else if (rc < 0 && -rc < sys_nerr) + fprintf(stderr, "%s: %s\n", func, sys_errlist[-rc]); + else + fprintf(stderr, "%s: error %d\n", func, rc); + + if (dstfd > 0) + close(dstfd); + if (dstname) + unlink(dstname); + exit(1); +} + +/* + * Write complete callback. + * Adjust counts and free resources + */ +static void wr_done(io_context_t ctx, struct iocb *iocb, long res, long res2) +{ + if (res2 != 0) { + io_error("aio write", res2); + } + if (res != iocb->u.c.nbytes) { + fprintf(stderr, "write missed bytes expect %d got %d\n", iocb->u.c.nbytes, res2); + exit(1); + } + --tocopy; + --busy; + free(iocb->u.c.buf); + + memset(iocb, 0xff, sizeof(iocb)); // paranoia + free(iocb); + write(2, "w", 1); +} + +/* + * Read complete callback. + * Change read iocb into a write iocb and start it. + */ +static void rd_done(io_context_t ctx, struct iocb *iocb, long res, long res2) +{ + /* library needs accessors to look at iocb? */ + int iosize = iocb->u.c.nbytes; + char *buf = iocb->u.c.buf; + off_t offset = iocb->u.c.offset; + + if (res2 != 0) + io_error("aio read", res2); + if (res != iosize) { + fprintf(stderr, "read missing bytes expect %d got %d\n", iocb->u.c.nbytes, res); + exit(1); + } + + + /* turn read into write */ + io_prep_pwrite(iocb, dstfd, buf, iosize, offset); + io_set_callback(iocb, wr_done); + if (1 != (res = io_submit(ctx, 1, &iocb))) + io_error("io_submit write", res); + write(2, "r", 1); +} + + +int main(int argc, char *const *argv) +{ + int srcfd; + struct stat st; + off_t length = 0, offset = 0; + io_context_t myctx; + + if (argc != 3 || argv[1][0] == '-') { + fprintf(stderr, "Usage: aiocp SOURCE DEST"); + exit(1); + } + if ((srcfd = open(srcname = argv[1], O_RDONLY)) < 0) { + perror(srcname); + exit(1); + } + if (fstat(srcfd, &st) < 0) { + perror("fstat"); + exit(1); + } + length = st.st_size; + + if ((dstfd = open(dstname = argv[2], O_WRONLY | O_CREAT, 0666)) < 0) { + close(srcfd); + perror(dstname); + exit(1); + } + + /* initialize state machine */ + memset(&myctx, 0, sizeof(myctx)); + io_queue_init(AIO_MAXIO, &myctx); + tocopy = howmany(length, AIO_BLKSIZE); + + while (tocopy > 0) { + int i, rc; + /* Submit as many reads as once as possible upto AIO_MAXIO */ + int n = MIN(MIN(AIO_MAXIO - busy, AIO_MAXIO / 2), + howmany(length - offset, AIO_BLKSIZE)); + if (n > 0) { + struct iocb *ioq[n]; + + for (i = 0; i < n; i++) { + struct iocb *io = (struct iocb *) malloc(sizeof(struct iocb)); + int iosize = MIN(length - offset, AIO_BLKSIZE); + char *buf = (char *) malloc(iosize); + + if (NULL == buf || NULL == io) { + fprintf(stderr, "out of memory\n"); + exit(1); + } + + io_prep_pread(io, srcfd, buf, iosize, offset); + io_set_callback(io, rd_done); + ioq[i] = io; + offset += iosize; + } + + rc = io_submit(myctx, n, ioq); + if (rc < 0) + io_error("io_submit", rc); + + busy += n; + } + + // Handle IO's that have completed + rc = io_queue_run(myctx); + if (rc < 0) + io_error("io_queue_run", rc); + + // if we have maximum number of i/o's in flight + // then wait for one to complete + if (busy == AIO_MAXIO) { + rc = io_queue_wait(myctx, NULL); + if (rc < 0) + io_error("io_queue_wait", rc); + } + + } + + close(srcfd); + close(dstfd); + exit(0); +} + +/* + * Results look like: + * [alanm@toolbox ~/MOT3]$ ../taio kernel-source-2.4.8-0.4g.ppc.rpm abc + * rrrrrrrrrrrrrrrwwwrwrrwwrrwrwwrrwrwrwwrrwrwrrrrwwrwwwrrwrrrwwwwwwwwwwwwwwwww + * rrrrrrrrrrrrrrwwwrrwrwrwrwrrwwwwwwwwwwwwwwrrrrrrrrrrrrrrrrrrwwwwrwrwwrwrwrwr + * wrrrrrrrwwwwwwwwwwwwwrrrwrrrwrrwrwwwwwwwwwwrrrrwwrwrrrrrrrrrrrwwwwwwwwwwwrww + * wwwrrrrrrrrwwrrrwwrwrwrwwwrrrrrrrwwwrrwwwrrwrwwwwwwwwrrrrrrrwwwrrrrrrrwwwwww + * wwwwwwwrwrrrrrrrrwrrwrrwrrwrwrrrwrrrwrrrwrwwwwwwwwwwwwwwwwwwrrrwwwrrrrrrrrrr + * rrwrrrrrrwrrwwwwwwwwwwwwwwwwrwwwrrwrwwrrrrrrrrrrrrrrrrrrrwwwwwwwwwwwwwwwwwww + * rrrrrwrrwrwrwrrwrrrwwwwwwwwrrrrwrrrwrwwrwrrrwrrwrrrrwwwwwwwrwrwwwwrwwrrrwrrr + * rrrwwwwwwwrrrrwwrrrrrrrrrrrrwrwrrrrwwwwwwwwwwwwwwrwrrrrwwwwrwrrrrwrwwwrrrwww + * rwwrrrrrrrwrrrrrrrrrrrrwwwwrrrwwwrwrrwwwwwwwwwwwwwwwwwwwwwrrrrrrrwwwwwwwrw + */ +.fi +.SH "SEE ALSO" +.BR io_cancel(3), +.BR io_fsync(3), +.BR io_getevents(3), +.BR io_prep_fsync(3), +.BR io_prep_pread(3), +.BR io_prep_pwrite(3), +.BR io_queue_init(3), +.BR io_queue_release(3), +.BR io_queue_run(3), +.BR io_queue_wait(3), +.BR io_set_callback(3), +.BR io_submit(3), +.BR errno(3) diff --git a/man/io_cancel.3 b/man/io_cancel.3 new file mode 100644 index 0000000..9a16084 --- /dev/null +++ b/man/io_cancel.3 @@ -0,0 +1,65 @@ +.TH io_cancel 2 2002-09-03 "Linux 2.4" "Linux AIO" +.SH NAME +io_cancel \- Cancel io requests +.SH SYNOPSIS +.nf +.B #include +.sp +.br +.B #include +.sp +.br +.BI "int io_cancel(io_context_t ctx, struct iocb *iocb)" +.br +.sp +struct iocb { + void *data; /* Return in the io completion event */ + unsigned key; /* For use in identifying io requests */ + short aio_lio_opcode; + short aio_reqprio; /* Not used */ + int aio_fildes; +}; +.fi +.SH DESCRIPTION +Attempts to cancel an iocb previously passed to io_submit. If +the operation is successfully cancelled, the resulting event is +copied into the memory pointed to by result without being placed +into the completion queue. +.PP +When one or more requests are asynchronously processed, it might be +useful in some situations to cancel a selected operation, e.g., if it +becomes obvious that the written data is no longer accurate and would +have to be overwritten soon. As an example, assume an application, which +writes data in files in a situation where new incoming data would have +to be written in a file which will be updated by an enqueued request. +.SH "RETURN VALUES" +0 is returned on success , otherwise returns Errno. +.SH ERRORS +.TP +.B EFAULT +If any of the data structures pointed to are invalid. +.TP +.B EINVAL +If aio_context specified by ctx_id is +invalid. +.TP +.B EAGAIN +If the iocb specified was not +cancelled. +.TP +.B ENOSYS +if not implemented. +.SH "SEE ALSO" +.BR io(3), +.BR io_fsync(3), +.BR io_getevents(3), +.BR io_prep_fsync(3), +.BR io_prep_pread(3), +.BR io_prep_pwrite(3), +.BR io_queue_init(3), +.BR io_queue_release(3), +.BR io_queue_run(3), +.BR io_queue_wait(3), +.BR io_set_callback(3), +.BR io_submit(3), +.BR errno(3) diff --git a/man/io_fsync.3 b/man/io_fsync.3 new file mode 100644 index 0000000..53eb63d --- /dev/null +++ b/man/io_fsync.3 @@ -0,0 +1,82 @@ +./" static inline int io_fsync(io_context_t ctx, struct iocb *iocb, io_callback_t cb, int fd) +./" { +./" io_prep_fsync(iocb, fd); +./" io_set_callback(iocb, cb); +./" return io_submit(ctx, 1, &iocb); +./" } +.TH io_fsync 3 2002-09-12 "Linux 2.4" Linux AIO" +.SH NAME +io_fsync \- Synchronize a file's complete in-core state with that on disk +.SH SYNOPSYS +.nf +.B #include +.sp +.br +.B #include +.sp +.br +.BI "int io_fsync(io_context_t ctx, struct iocb *iocb, io_callback_t cb, int fd)" +.sp +struct iocb { + void *data; + unsigned key; + short aio_lio_opcode; + short aio_reqprio; + int aio_fildes; +}; +.sp +typedef void (*io_callback_t)(io_context_t ctx, struct iocb *iocb, long res, long res2); +.sp +.fi +.SH DESCRIPTION +When dealing with asynchronous operations it is sometimes necessary to +get into a consistent state. This would mean for AIO that one wants to +know whether a certain request or a group of request were processed. +This could be done by waiting for the notification sent by the system +after the operation terminated, but this sometimes would mean wasting +resources (mainly computation time). +.PP +Calling this function forces all I/O operations operating queued at the +time of the function call operating on the file descriptor +.IR "iocb->io_fildes" +into the synchronized I/O completion state . The +.IR "io_fsync" +function returns +immediately but the notification through the method described in +.IR "io_callback" +will happen only after all requests for this +file descriptor have terminated and the file is synchronized. This also +means that requests for this very same file descriptor which are queued +after the synchronization request are not affected. +.SH "RETURN VALUES" +Returns 0, otherwise returns errno. +.SH ERRORS +.TP +.B EFAULT +.I iocbs +referenced data outside of the program's accessible address space. +.TP +.B EINVAL +.I ctx +refers to an unitialized aio context, the iocb pointed to by +.I iocbs +contains an improperly initialized iocb, +.TP +.B EBADF +The iocb contains a file descriptor that does not exist. +.TP +.B EINVAL +The file specified in the iocb does not support the given io operation. +.SH "SEE ALSO" +.BR io(3), +.BR io_cancel(3), +.BR io_getevents(3), +.BR io_prep_pread(3), +.BR io_prep_pwrite(3), +.BR io_queue_init(3), +.BR io_queue_release(3), +.BR io_queue_run(3), +.BR io_queue_wait(3), +.BR io_set_callback(3), +.BR io_submit(3), +.BR errno(3) diff --git a/man/io_getevents.3 b/man/io_getevents.3 new file mode 100644 index 0000000..5062daa --- /dev/null +++ b/man/io_getevents.3 @@ -0,0 +1,134 @@ +./"/* io_getevents: +./" * Attempts to read at least min_nr events and up to nr events from +./" * the completion queue for the aio_context specified by ctx_id. May +./" * fail with -EINVAL if ctx_id is invalid, if min_nr is out of range, +./" * if nr is out of range, if when is out of range. May fail with +./" * -EFAULT if any of the memory specified to is invalid. May return +./" * 0 or < min_nr if no events are available and the timeout specified +./" * by when has elapsed, where when == NULL specifies an infinite +./" * timeout. Note that the timeout pointed to by when is relative and +./" * will be updated if not NULL and the operation blocks. Will fail +./" * with -ENOSYS if not implemented. +./" */ +./"asmlinkage long sys_io_getevents(io_context_t ctx_id, +./" long min_nr, +./" long nr, +./" struct io_event *events, +./" struct timespec *timeout) +./" +.TH io_getevents 2 2002-09-03 "Linux 2.4" "Linux AIO" +.SH NAME +io_getevents, aio_pgetevents \- Read resulting events from io requests +.SH SYNOPSIS +.nf +.B #include +.sp +.br +.B #include +.br +.sp +struct iocb { + void *data; + unsigned key; + short aio_lio_opcode; + short aio_reqprio; + int aio_fildes; +}; +.sp +struct io_event { + unsigned PADDED(data, __pad1); + unsigned PADDED(obj, __pad2); + unsigned PADDED(res, __pad3); + unsigned PADDED(res2, __pad4); +}; +.sp +.BI "int io_getevents(io_context_t " ctx ", long " nr ", struct io_event *" events "[], struct timespec *" timeout ");" +.BI "int io_pgetevents(io_context_t " ctx ", long " nr ", struct io_event *" events "[], struct timespec *" timeout ", sigset_t *" sigmask ");" +.fi +.SH DESCRIPTION +Attempts to read up to nr events from +the completion queue for the aio_context specified by ctx. +.SH "RETURN VALUES" +May return +0 if no events are available and the timeout specified +by when has elapsed, where when == NULL specifies an infinite +timeout. Note that the timeout pointed to by when is relative and +will be updated if not NULL and the operation blocks. Will fail +with ENOSYS if not implemented. +.SS io_pgetevents() +The relationship between +.BR io_getevents () +and +.BR io_pgetevents () +is analogous to the relationship between +.BR select (2) +and +.BR pselect (2): +similar +.BR pselect (2), +.BR pgetevents () +allows an application to safely wait until either an aio completion +events happens or until a signal is caught. +.PP +The following +.BR io_pgetevents () +call: +call: +.PP +.in +4n +.EX +ret = io_pgetevents(ctx, min_nr, nr, events, timeout, sigmask); +.EE +.in +.PP +is equivalent to +.I atomically +executing the following calls: +.PP +.in +4n +.EX +sigset_t origmask; + +pthread_sigmask(SIG_SETMASK, &sigmask, &origmask); +ret = io_getevents(ctx, min_nr, nr, events, timeout); +pthread_sigmask(SIG_SETMASK, &origmask, NULL); +.EE +.in +.PP +See the description of +.BR pselect (2) +for an explanation of why +.BR io_pgetevents () +is necessary. +.PP +If the +.I sigmask +argument is specified as NULL, then no signal mask manipulation is +performed (and thus +.BR io_pgetevents () +behaves the same as +.BR io_getevents() +) . +.SH ERRORS +.TP +.B EINVAL +if ctx_id is invalid, if min_nr is out of range, +if nr is out of range, if when is out of range. +.TP +.B EFAULT +if any of the memory specified to is invalid. +.SH "SEE ALSO" +.BR io(3), +.BR io_cancel(3), +.BR io_fsync(3), +.BR io_prep_fsync(3), +.BR io_prep_pread(3), +.BR io_prep_pwrite(3), +.BR io_queue_init(3), +.BR io_queue_release(3), +.BR io_queue_run(3), +.BR io_queue_wait(3), +.BR io_set_callback(3), +.BR io_submit(3), +.BR errno(3), +.BR pselect(2) diff --git a/man/io_prep_fsync.3 b/man/io_prep_fsync.3 new file mode 100644 index 0000000..4cf935a --- /dev/null +++ b/man/io_prep_fsync.3 @@ -0,0 +1,89 @@ +./" static inline void io_prep_fsync(struct iocb *iocb, int fd) +./" { +./" memset(iocb, 0, sizeof(*iocb)); +./" iocb->aio_fildes = fd; +./" iocb->aio_lio_opcode = IO_CMD_FSYNC; +./" iocb->aio_reqprio = 0; +./" } +.TH io_prep_fsync 3 2002-09-12 "Linux 2.4" Linux AIO" +.SH NAME +io_prep_fsync \- Synchronize a file's complete in-core state with that on disk +.SH SYNOPSYS +.nf +.B #include +.br +.sp +.B #include +.br +.sp +.BI "static inline void io_prep_fsync(struct iocb *iocb, int fd)" +.sp +struct iocb { + void *data; + unsigned key; + short aio_lio_opcode; + short aio_reqprio; + int aio_fildes; +}; +.sp +.fi +.SH DESCRIPTION +This is an inline convenience function for setting up an iocbv for a FSYNC request. +.br +The file for which +.TP +.IR "iocb->aio_fildes = fd" +is a descriptor is set up with +the command +.TP +.IR "iocb->aio_lio_opcode = IO_CMD_FSYNC: +. +.PP +The io_prep_fsync() function shall set up an IO_CMD_FSYNC operation +to asynchronously force all I/O +operations associated with the file indicated by the file +descriptor aio_fildes member of the iocb structure referenced by +the iocb argument and queued at the time of the call to +io_submit() to the synchronized I/O completion state. The function +call shall return when the synchronization request has been +initiated or queued to the file or device (even when the data +cannot be synchronized immediately). + +All currently queued I/O operations shall be completed as if by a call +to fsync(); that is, as defined for synchronized I/O file +integrity completion. If the +operation queued by io_prep_fsync() fails, then, as for fsync(), +outstanding I/O operations are not guaranteed to have +been completed. + +If io_prep_fsync() succeeds, then it is only the I/O that was queued +at the time of the call to io_submit() that is guaranteed to be +forced to the relevant completion state. The completion of +subsequent I/O on the file descriptor is not guaranteed to be +completed in a synchronized fashion. +.PP +This function returns immediately . To schedule the operation, the +function +.IR io_submit +must be called. +.PP +Simultaneous asynchronous operations using the same iocb produce +undefined results. +.SH "RETURN VALUES" +None +.SH ERRORS +None +.SH "SEE ALSO" +.BR io(3), +.BR io_cancel(3), +.BR io_fsync(3), +.BR io_getevents(3), +.BR io_prep_pread(3), +.BR io_prep_pwrite(3), +.BR io_queue_init(3), +.BR io_queue_release(3), +.BR io_queue_run(3), +.BR io_queue_wait(3), +.BR io_set_callback(3), +.BR io_submit(3), +.BR errno(3) diff --git a/man/io_prep_pread.3 b/man/io_prep_pread.3 new file mode 100644 index 0000000..5938aec --- /dev/null +++ b/man/io_prep_pread.3 @@ -0,0 +1,79 @@ +./" static inline void io_prep_pread(struct iocb *iocb, int fd, void *buf, size_t count, long long offset) +./" { +./" memset(iocb, 0, sizeof(*iocb)); +./" iocb->aio_fildes = fd; +./" iocb->aio_lio_opcode = IO_CMD_PREAD; +./" iocb->aio_reqprio = 0; +./" iocb->u.c.buf = buf; +./" iocb->u.c.nbytes = count; +./" iocb->u.c.offset = offset; +./" } +.TH io_prep_pread 3 2002-09-12 "Linux 2.4" Linux AIO" +.SH NAME +io_prep_pread \- Set up asynchronous read +.SH SYNOPSYS +.nf +.B #include +.sp +.br +.B #include +.br +.sp +.BI "inline void io_prep_pread(struct iocb *iocb, int fd, void *buf, size_t count, long long offset) +" +.sp +struct iocb { + void *data; + unsigned key; + short aio_lio_opcode; + short aio_reqprio; + int aio_fildes; +}; +.fi +.SH DESCRIPTION +.IR io_prep_pread +is an inline convenience function designed to facilitate the initialization of +the iocb for an asynchronous read operation. + +The first +.TP +.IR "iocb->u.c.nbytes = count" +bytes of the file for which +.TP +.IR "iocb->aio_fildes = fd" +is a descriptor are written to the buffer +starting at +.TP +.IR "iocb->u.c.buf = buf" +. +.br +Reading starts at the absolute position +.TP +.IR "ioc->u.c.offset = offset" +in the file. +.PP +This function returns immediately . To schedule the operation, the +function +.IR io_submit +must be called. +.PP +Simultaneous asynchronous operations using the same iocb produce +undefined results. +.SH "RETURN VALUES" +None +.SH ERRORS +None +.SH "SEE ALSO" +.BR io(3), +.BR io_cancel(3), +.BR io_fsync(3), +.BR io_getevents(3), +.BR io_prep_fsync(3), +.BR io_prep_pwrite(3), +.BR io_queue_init(3), +.BR io_queue_release(3), +.BR io_queue_run(3), +.BR io_queue_wait(3), +.BR io_set_callback(3), +.BR io_submit(3), +.BR errno(3) diff --git a/man/io_prep_pwrite.3 b/man/io_prep_pwrite.3 new file mode 100644 index 0000000..68b3500 --- /dev/null +++ b/man/io_prep_pwrite.3 @@ -0,0 +1,77 @@ +./" static inline void io_prep_pwrite(struct iocb *iocb, int fd, void *buf, size_t count, long long offset) +./" { +./" memset(iocb, 0, sizeof(*iocb)); +./" iocb->aio_fildes = fd; +./" iocb->aio_lio_opcode = IO_CMD_PWRITE; +./" iocb->aio_reqprio = 0; +./" iocb->u.c.buf = buf; +./" iocb->u.c.nbytes = count; +./" iocb->u.c.offset = offset; +./" } +.TH io_prep_pwrite 3 2002-09-12 "Linux 2.4" Linux AIO" +.SH NAME +io_prep_pwrite \- Set up iocb for asynchronous writes +.SH SYNOPSYS +.nf +.B #include +.br +.sp +.B #include +.br +.sp +.BI "inline void io_prep_pwrite(struct iocb *iocb, int fd, void *buf, size_t count, long long offset) +" +.sp +struct iocb { + void *data; + unsigned key; + short aio_lio_opcode; + short aio_reqprio; + int aio_fildes; +}; +.fi +.SH DESCRIPTION +io_prep_write is a convenicence function for setting up parallel writes. + +The first +.TP +.IR "iocb->u.c.nbytes = count" +bytes of the file for which +.TP +.IR "iocb->aio_fildes = fd" +is a descriptor are written from the buffer +starting at +.TP +.IR "iocb->u.c.buf = buf" +. +.br +Writing starts at the absolute position +.TP +.IR "ioc->u.c.offset = offset" +in the file. +.PP +This function returns immediately . To schedule the operation, the +function +.IR io_submit +must be called. +.PP +Simultaneous asynchronous operations using the same iocb produce +undefined results. +.SH "RETURN VALUES" +None +.SH ERRORS +None +.SH "SEE ALSO" +.BR io(3), +.BR io_cancel(3), +.BR io_fsync(3), +.BR io_getevents(3), +.BR io_prep_fsync(3), +.BR io_prep_pread(3), +.BR io_queue_init(3), +.BR io_queue_release(3), +.BR io_queue_run(3), +.BR io_queue_wait(3), +.BR io_set_callback(3), +.BR io_submit(3), +.BR errno(3) diff --git a/man/io_queue_init.3 b/man/io_queue_init.3 new file mode 100644 index 0000000..317f631 --- /dev/null +++ b/man/io_queue_init.3 @@ -0,0 +1,63 @@ +.TH io_queue_init 2 2002-09-03 "Linux 2.4" "Linux AIO" +.SH NAME +io_queue_init \- Initialize asynchronous io state machine + +.SH SYNOPSIS +.nf +.B #include +.br +.sp +.B #include +.br +.sp +.BI "int io_queue_init(int maxevents, io_context_t *ctx );" +.sp +.fi +.SH DESCRIPTION +.B io_queue_init +Attempts to create an aio context capable of receiving at least +.IR maxevents +events. +.IR ctx +must point to an aio context that already exists and must be initialized +to +.IR 0 +before the call. +If the operation is successful, *cxtp is filled with the resulting handle. +.SH "RETURN VALUES" +On success, +.B io_queue_init +returns 0. Otherwise, -error is return, where +error is one of the Exxx values defined in the Errors section. +.SH ERRORS +.TP +.B EFAULT +.I iocbs +referenced data outside of the program's accessible address space. +.TP +.B EINVAL +.I maxevents +is <= 0 or +.IR ctx +is an invalid memory locattion. +.TP +.B ENOSYS +Not implemented +.TP +.B EAGAIN +.IR "maxevents > max_aio_reqs" +where max_aio_reqs is a tunable value. +.SH "SEE ALSO" +.BR io(3), +.BR io_cancel(3), +.BR io_fsync(3), +.BR io_getevents(3), +.BR io_prep_fsync(3), +.BR io_prep_pread(3), +.BR io_prep_pwrite(3), +.BR io_queue_release(3), +.BR io_queue_run(3), +.BR io_queue_wait(3), +.BR io_set_callback(3), +.BR io_submit(3), +.BR errno(3) diff --git a/man/io_queue_release.3 b/man/io_queue_release.3 new file mode 100644 index 0000000..06b9ec0 --- /dev/null +++ b/man/io_queue_release.3 @@ -0,0 +1,48 @@ +.TH io_queue_release 2 2002-09-03 "Linux 2.4" "Linux AIO" +.SH NAME +io_queue_release \- Release the context associated with the userspace handle +.SH SYNOPSIS +.nf +.B #include +.br +.B #include +.br +.sp +.BI "int io_queue_release(io_context_t ctx)" +.sp +.SH DESCRIPTION +.B io_queue_release +destroys the context associated with the userspace handle. May cancel any outstanding +AIOs and block on completion. + +.B cts. +.SH "RETURN VALUES" +On success, +.B io_queue_release +returns 0. Otherwise, -error is return, where +error is one of the Exxx values defined in the Errors section. +.SH ERRORS +.TP +.B EINVAL +.I ctx +refers to an unitialized aio context, the iocb pointed to by +.I iocbs +contains an improperly initialized iocb, +.TP +.B ENOSYS +Not implemented +.SH "SEE ALSO" +.BR io(3), +.BR io_cancel(3), +.BR io_fsync(3), +.BR io_getevents(3), +.BR io_prep_fsync(3), +.BR io_prep_pread(3), +.BR io_prep_pwrite(3), +.BR io_queue_init(3), +.BR io_queue_run(3), +.BR io_queue_wait(3), +.BR io_set_callback(3), +.BR io_submit(3), +.BR errno(3) + diff --git a/man/io_queue_run.3 b/man/io_queue_run.3 new file mode 100644 index 0000000..57dd417 --- /dev/null +++ b/man/io_queue_run.3 @@ -0,0 +1,50 @@ +.TH io_queue_run 2 2002-09-03 "Linux 2.4" "Linux AIO" +.SH NAME +io_queue_run \- Handle completed io requests +.SH SYNOPSIS +.nf +.B #include +.br +.sp +.B #include +.br +.sp +.BI "int io_queue_run(io_context_t ctx );" +.sp +.fi +.SH DESCRIPTION +.B io_queue_run +Attempts to read all the events events from +the completion queue for the aio_context specified by ctx_id. +.SH "RETURN VALUES" +May return +0 if no events are available. +Will fail with -ENOSYS if not implemented. +.SH ERRORS +.TP +.B EFAULT +.I iocbs +referenced data outside of the program's accessible address space. +.TP +.B EINVAL +.I ctx +refers to an unitialized aio context, the iocb pointed to by +.I iocbs +contains an improperly initialized iocb, +.TP +.B ENOSYS +Not implemented +.SH "SEE ALSO" +.BR io(3), +.BR io_cancel(3), +.BR io_fsync(3), +.BR io_getevents(3), +.BR io_prep_fsync(3), +.BR io_prep_pread(3), +.BR io_prep_pwrite(3), +.BR io_queue_init(3), +.BR io_queue_release(3), +.BR io_queue_wait(3), +.BR io_set_callback(3), +.BR io_submit(3), +.BR errno(3) diff --git a/man/io_queue_wait.3 b/man/io_queue_wait.3 new file mode 100644 index 0000000..2306663 --- /dev/null +++ b/man/io_queue_wait.3 @@ -0,0 +1,56 @@ +.TH io_queue_wait 2 2002-09-03 "Linux 2.4" "Linux AIO" +.SH NAME +io_queue_wait \- Wait for io requests to complete +.SH SYNOPSIS +.nf +.B #include +.br +.sp +.B #include +.br +.sp +.BI "int io_queue_wait(io_context_t ctx, const struct timespec *timeout);" +.fi +.SH DESCRIPTION +Attempts to read an event from +the completion queue for the aio_context specified by ctx_id. +.SH "RETURN VALUES" +May return +0 if no events are available and the timeout specified +by when has elapsed, where when == NULL specifies an infinite +timeout. Note that the timeout pointed to by when is relative and +will be updated if not NULL and the operation blocks. Will fail +with -ENOSYS if not implemented. +.SH "RETURN VALUES" +On success, +.B io_queue_wait +returns 0. Otherwise, -error is return, where +error is one of the Exxx values defined in the Errors section. +.SH ERRORS +.TP +.B EFAULT +.I iocbs +referenced data outside of the program's accessible address space. +.TP +.B EINVAL +.I ctx +refers to an unitialized aio context, the iocb pointed to by +.I iocbs +contains an improperly initialized iocb, +.TP +.B ENOSYS +Not implemented +.SH "SEE ALSO" +.BR io(3), +.BR io_cancel(3), +.BR io_fsync(3), +.BR io_getevents(3), +.BR io_prep_fsync(3), +.BR io_prep_pread(3), +.BR io_prep_pwrite(3), +.BR io_queue_init(3), +.BR io_queue_release(3), +.BR io_queue_run(3), +.BR io_set_callback(3), +.BR io_submit(3), +.BR errno(3) diff --git a/man/io_set_callback.3 b/man/io_set_callback.3 new file mode 100644 index 0000000..a8ca789 --- /dev/null +++ b/man/io_set_callback.3 @@ -0,0 +1,44 @@ +./"static inline void io_set_callback(struct iocb *iocb, io_callback_t cb) +.TH io_set_callback 3 2002-09-12 "Linux 2.4" Linux AIO" +.SH NAME +io_set_callback \- Set up io completion callback function +.SH SYNOPSYS +.nf +.B #include +.br +.sp +.B #include +.br +.sp +.BI "static inline void io_set_callback(struct iocb *iocb, io_callback_t cb)" +.sp +struct iocb { + void *data; + unsigned key; + short aio_lio_opcode; + short aio_reqprio; + int aio_fildes; +}; +.sp +typedef void (*io_callback_t)(io_context_t ctx, struct iocb *iocb, long res, long res2); +.sp +.fi +.SH DESCRIPTION +The callback is not done if the caller uses raw events from +io_getevents, only with the library helpers +.SH "RETURN VALUES" +.SH ERRORS +.SH "SEE ALSO" +.BR io(3), +.BR io_cancel(3), +.BR io_fsync(3), +.BR io_getevents(3), +.BR io_prep_fsync(3), +.BR io_prep_pread(3), +.BR io_prep_pwrite(3), +.BR io_queue_init(3), +.BR io_queue_release(3), +.BR io_queue_run(3), +.BR io_queue_wait(3), +.BR io_submit(3), +.BR errno(3) diff --git a/man/io_submit.3 b/man/io_submit.3 new file mode 100644 index 0000000..b6966ef --- /dev/null +++ b/man/io_submit.3 @@ -0,0 +1,135 @@ +./"/* sys_io_submit: +./" * Queue the nr iocbs pointed to by iocbpp for processing. Returns +./" * the number of iocbs queued. May return -EINVAL if the aio_context +./" * specified by ctx_id is invalid, if nr is < 0, if the iocb at +./" * *iocbpp[0] is not properly initialized, if the operation specified +./" * is invalid for the file descriptor in the iocb. May fail with +./" * -EFAULT if any of the data structures point to invalid data. May +./" * fail with -EBADF if the file descriptor specified in the first +./" * iocb is invalid. May fail with -EAGAIN if insufficient resources +./" * are available to queue any iocbs. Will return 0 if nr is 0. Will +./" * fail with -ENOSYS if not implemented. +./" */ +.TH io_submit 2 2002-09-02 "Linux 2.4" "Linux AIO" +.SH NAME +io_submit \- Submit io requests +.SH SYNOPSIS +.nf +.B #include +.br +.sp +.B #include +.br +.sp +.BI "int io_submit(io_context_t " ctx ", long " nr ", struct iocb *" iocbs "[]);" +.sp +struct iocb { + void *data; + unsigned key; + short aio_lio_opcode; + short aio_reqprio; + int aio_fildes; +}; +.fi +.SH DESCRIPTION +.B io_submit +submits +.I nr +iocbs for processing for a given io context ctx. + +The +.IR "io_submit" +function can be used to enqueue an arbitrary +number of read and write requests at one time. The requests can all be +meant for the same file, all for different files or every solution in +between. + +.IR "io_submit" +gets the +.IR "nr" +requests from the array pointed to +by +.IR "iocbs" +. The operation to be performed is determined by the +.IR "aio_lio_opcode" +member in each element of +.IR "iocbs" +. If this +field is +.B "IO_CMD_PREAD" +a read operation is enqueued, similar to a call +of +.IR "io_prep_pread" +for this element of the array (except that the way +the termination is signalled is different, as we will see below). If +the +.IR "aio_lio_opcode" +member is +.B "IO_CMD_PWRITE" +a write operation +is enqueued. Otherwise the +.IR "aio_lio_opcode" +must be +.B "IO_CMD_NOP" +in which case this element of +.IR "iocbs" +is simply ignored. This +``operation'' is useful in situations where one has a fixed array of +.IR "struct iocb" +elements from which only a few need to be handled at +a time. Another situation is where the +.IR "io_submit" +call was +canceled before all requests are processed and the remaining requests have to be reissued. + +The other members of each element of the array pointed to by +.IR "iocbs" +must have values suitable for the operation as described in +the documentation for +.IR "io_prep_pread" +and +.IR "io_prep_pwrite" +above. + +The function returns immediately after +having enqueued all the requests. +On success, +.B io_submit +returns the number of iocbs submitted successfully. Otherwise, -error is return, where +error is one of the Exxx values defined in the Errors section. +.PP +If an error is detected, then the behavior is undefined. +.PP +Simultaneous asynchronous operations using the same iocb produce +undefined results. +.SH ERRORS +.TP +.B EFAULT +.I iocbs +referenced data outside of the program's accessible address space. +.TP +.B EINVAL +.I ctx +refers to an unitialized aio context, the iocb pointed to by +.I iocbs +contains an improperly initialized iocb, +.TP +.B EBADF +The iocb contains a file descriptor that does not exist. +.TP +.B EINVAL +The file specified in the iocb does not support the given io operation. +.SH "SEE ALSO" +.BR io(3), +.BR io_cancel(3), +.BR io_fsync(3), +.BR io_getevents(3), +.BR io_prep_fsync(3), +.BR io_prep_pread(3), +.BR io_prep_pwrite(3), +.BR io_queue_init(3), +.BR io_queue_release(3), +.BR io_queue_run(3), +.BR io_queue_wait(3), +.BR io_set_callback(3), +.BR errno(3) diff --git a/src/Makefile b/src/Makefile new file mode 100644 index 0000000..b437945 --- /dev/null +++ b/src/Makefile @@ -0,0 +1,73 @@ +prefix=/usr +includedir=$(prefix)/include +libdir=$(prefix)/lib + +CFLAGS ?= -g -fomit-frame-pointer -O2 +CFLAGS += -nostdlib -nostartfiles -Wall -I. -fPIC +SO_CFLAGS=-shared $(CFLAGS) +L_CFLAGS=$(CFLAGS) +LINK_FLAGS= +LINK_FLAGS+=$(LDFLAGS) +ENABLE_SHARED ?= 1 + +soname=libaio.so.1 +minor=0 +micro=1 +libname=$(soname).$(minor).$(micro) +all_targets += libaio.a + +ifeq ($(ENABLE_SHARED),1) +all_targets += $(libname) +endif + +all: $(all_targets) + +# libaio provided functions +libaio_srcs := io_queue_init.c io_queue_release.c +libaio_srcs += io_queue_wait.c io_queue_run.c + +# real syscalls +libaio_srcs += io_getevents.c io_submit.c io_cancel.c +libaio_srcs += io_setup.c io_destroy.c io_pgetevents.c + +# internal functions +libaio_srcs += raw_syscall.c + +# old symbols +libaio_srcs += compat-0_1.c + +libaio_objs := $(patsubst %.c,%.ol,$(libaio_srcs)) +libaio_sobjs := $(patsubst %.c,%.os,$(libaio_srcs)) + +$(libaio_objs) $(libaio_sobjs): libaio.h vsys_def.h + +%.os: %.c + $(CC) $(SO_CFLAGS) -c -o $@ $< + +%.ol: %.c + $(CC) $(L_CFLAGS) -c -o $@ $< + +AR ?= ar +RANLIB ?= ranlib +libaio.a: $(libaio_objs) + rm -f libaio.a + $(AR) r libaio.a $^ + $(RANLIB) libaio.a + +$(libname): $(libaio_sobjs) libaio.map + $(CC) $(SO_CFLAGS) -Wl,--version-script=libaio.map -Wl,-soname=$(soname) -o $@ $(libaio_sobjs) $(LINK_FLAGS) + +install: $(all_targets) + install -D -m 644 libaio.h $(includedir)/libaio.h + install -D -m 644 libaio.a $(libdir)/libaio.a +ifeq ($(ENABLE_SHARED),1) + install -D -m 755 $(libname) $(libdir)/$(libname) + ln -sf $(libname) $(libdir)/$(soname) + ln -sf $(libname) $(libdir)/libaio.so +endif + +$(libaio_objs): libaio.h + +clean: + rm -f $(all_targets) $(libaio_objs) $(libaio_sobjs) $(soname).new + rm -f *.so* *.a *.o diff --git a/src/aio_ring.h b/src/aio_ring.h new file mode 100644 index 0000000..3842c4b --- /dev/null +++ b/src/aio_ring.h @@ -0,0 +1,49 @@ +/* + libaio Linux async I/O interface + Copyright 2002 Red Hat, Inc. + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#ifndef _AIO_RING_H +#define _AIO_RING_H + +#define AIO_RING_MAGIC 0xa10a10a1 + +struct aio_ring { + unsigned id; /* kernel internal index number */ + unsigned nr; /* number of io_events */ + unsigned head; + unsigned tail; + + unsigned magic; + unsigned compat_features; + unsigned incompat_features; + unsigned header_length; /* size of aio_ring */ +}; + +static inline int aio_ring_is_empty(io_context_t ctx, struct timespec *timeout) +{ + struct aio_ring *ring = (struct aio_ring *)ctx; + + if (!ring || ring->magic != AIO_RING_MAGIC) + return 0; + if (!timeout || timeout->tv_sec || timeout->tv_nsec) + return 0; + if (ring->head != ring->tail) + return 0; + return 1; +} + +#endif /* _AIO_RING_H */ diff --git a/src/compat-0_1.c b/src/compat-0_1.c new file mode 100644 index 0000000..136396f --- /dev/null +++ b/src/compat-0_1.c @@ -0,0 +1,62 @@ +/* libaio Linux async I/O interface + + compat-0_1.c : compatibility symbols for libaio 0.1.x-0.3.x + + Copyright 2002 Red Hat, Inc. + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#include +#include + +#include "libaio.h" +#include "vsys_def.h" + +#include "syscall.h" + + +/* ABI change. Provide partial compatibility on this one for now. */ +SYMVER(compat0_1_io_cancel, io_cancel, 0.1); +int compat0_1_io_cancel(io_context_t ctx, struct iocb *iocb) +{ + struct io_event event; + + /* FIXME: the old ABI would return the event on the completion queue */ + return io_cancel(ctx, iocb, &event); +} + +SYMVER(compat0_1_io_queue_wait, io_queue_wait, 0.1); +int compat0_1_io_queue_wait(io_context_t ctx, struct timespec *when) +{ + struct timespec timeout; + if (when) + timeout = *when; + return io_getevents(ctx, 0, 0, NULL, when ? &timeout : NULL); +} + + +/* ABI change. Provide backwards compatibility for this one. */ +SYMVER(compat0_1_io_getevents, io_getevents, 0.1); +int compat0_1_io_getevents(io_context_t ctx_id, long nr, + struct io_event *events, + const struct timespec *const_timeout) +{ + struct timespec timeout; + if (const_timeout) + timeout = *const_timeout; + return io_getevents(ctx_id, 1, nr, events, + const_timeout ? &timeout : NULL); +} + diff --git a/src/io_cancel.c b/src/io_cancel.c new file mode 100644 index 0000000..2f0f5f4 --- /dev/null +++ b/src/io_cancel.c @@ -0,0 +1,23 @@ +/* io_cancel.c + libaio Linux async I/O interface + Copyright 2002 Red Hat, Inc. + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#include +#include "syscall.h" + +io_syscall3(int, io_cancel_0_4, io_cancel, io_context_t, ctx, struct iocb *, iocb, struct io_event *, event) +DEFSYMVER(io_cancel_0_4, io_cancel, 0.4) diff --git a/src/io_destroy.c b/src/io_destroy.c new file mode 100644 index 0000000..0ab6bd1 --- /dev/null +++ b/src/io_destroy.c @@ -0,0 +1,23 @@ +/* io_destroy + libaio Linux async I/O interface + Copyright 2002 Red Hat, Inc. + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#include +#include +#include "syscall.h" + +io_syscall1(int, io_destroy, io_destroy, io_context_t, ctx) diff --git a/src/io_getevents.c b/src/io_getevents.c new file mode 100644 index 0000000..90d6081 --- /dev/null +++ b/src/io_getevents.c @@ -0,0 +1,35 @@ +/* io_getevents.c + libaio Linux async I/O interface + Copyright 2002 Red Hat, Inc. + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#include +#include +#include +#include +#include "syscall.h" +#include "aio_ring.h" + +io_syscall5(int, __io_getevents_0_4, io_getevents, io_context_t, ctx, long, min_nr, long, nr, struct io_event *, events, struct timespec *, timeout) + +int io_getevents_0_4(io_context_t ctx, long min_nr, long nr, struct io_event * events, struct timespec * timeout) +{ + if (aio_ring_is_empty(ctx, timeout)) + return 0; + return __io_getevents_0_4(ctx, min_nr, nr, events, timeout); +} + +DEFSYMVER(io_getevents_0_4, io_getevents, 0.4) diff --git a/src/io_pgetevents.c b/src/io_pgetevents.c new file mode 100644 index 0000000..e6b0614 --- /dev/null +++ b/src/io_pgetevents.c @@ -0,0 +1,56 @@ +/* + libaio Linux async I/O interface + Copyright 2018 Christoph Hellwig. + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#include +#include +#include +#include +#include +#include "syscall.h" +#include "aio_ring.h" + +#ifdef __NR_io_pgetevents +io_syscall6(int, __io_pgetevents, io_pgetevents, io_context_t, ctx, long, + min_nr, long, nr, struct io_event *, events, + struct timespec *, timeout, void *, sigmask); + +int io_pgetevents(io_context_t ctx, long min_nr, long nr, + struct io_event *events, struct timespec *timeout, + sigset_t *sigmask) +{ + struct { + unsigned long ss; + unsigned long ss_len; + } data; + + if (aio_ring_is_empty(ctx, timeout)) + return 0; + + data.ss = (unsigned long)sigmask; + data.ss_len = _NSIG / 8; + return __io_pgetevents(ctx, min_nr, nr, events, timeout, &data); +} +#else +int io_pgetevents(io_context_t ctx, long min_nr, long nr, + struct io_event *events, struct timespec *timeout, + sigset_t *sigmask) + +{ + return -ENOSYS; +} +#endif /* __NR_io_pgetevents */ diff --git a/src/io_queue_init.c b/src/io_queue_init.c new file mode 100644 index 0000000..563d137 --- /dev/null +++ b/src/io_queue_init.c @@ -0,0 +1,33 @@ +/* io_queue_init.c + libaio Linux async I/O interface + Copyright 2002 Red Hat, Inc. + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#include +#include +#include +#include + +#include "syscall.h" + +int io_queue_init(int maxevents, io_context_t *ctxp) +{ + if (maxevents > 0) { + *ctxp = NULL; + return io_setup(maxevents, ctxp); + } + return -EINVAL; +} diff --git a/src/io_queue_release.c b/src/io_queue_release.c new file mode 100644 index 0000000..94bbb86 --- /dev/null +++ b/src/io_queue_release.c @@ -0,0 +1,27 @@ +/* io_queue_release.c + libaio Linux async I/O interface + Copyright 2002 Red Hat, Inc. + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#include +#include +#include +#include + +int io_queue_release(io_context_t ctx) +{ + return io_destroy(ctx); +} diff --git a/src/io_queue_run.c b/src/io_queue_run.c new file mode 100644 index 0000000..e0132f4 --- /dev/null +++ b/src/io_queue_run.c @@ -0,0 +1,39 @@ +/* io_submit + libaio Linux async I/O interface + Copyright 2002 Red Hat, Inc. + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#include +#include +#include +#include + +int io_queue_run(io_context_t ctx) +{ + static struct timespec timeout = { 0, 0 }; + struct io_event event; + int ret; + + /* FIXME: batch requests? */ + while (1 == (ret = io_getevents(ctx, 0, 1, &event, &timeout))) { + io_callback_t cb = (io_callback_t)event.data; + struct iocb *iocb = event.obj; + + cb(ctx, iocb, event.res, event.res2); + } + + return ret; +} diff --git a/src/io_queue_wait.c b/src/io_queue_wait.c new file mode 100644 index 0000000..538d2f3 --- /dev/null +++ b/src/io_queue_wait.c @@ -0,0 +1,31 @@ +/* io_submit + libaio Linux async I/O interface + Copyright 2002 Red Hat, Inc. + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#define NO_SYSCALL_ERRNO +#include +#include +#include +#include "syscall.h" + +struct timespec; + +int io_queue_wait_0_4(io_context_t ctx, struct timespec *timeout) +{ + return io_getevents(ctx, 0, 0, NULL, timeout); +} +DEFSYMVER(io_queue_wait_0_4, io_queue_wait, 0.4) diff --git a/src/io_setup.c b/src/io_setup.c new file mode 100644 index 0000000..4ba1afc --- /dev/null +++ b/src/io_setup.c @@ -0,0 +1,23 @@ +/* io_setup + libaio Linux async I/O interface + Copyright 2002 Red Hat, Inc. + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#include +#include +#include "syscall.h" + +io_syscall2(int, io_setup, io_setup, int, maxevents, io_context_t *, ctxp) diff --git a/src/io_submit.c b/src/io_submit.c new file mode 100644 index 0000000..e22ba54 --- /dev/null +++ b/src/io_submit.c @@ -0,0 +1,23 @@ +/* io_submit + libaio Linux async I/O interface + Copyright 2002 Red Hat, Inc. + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#include +#include +#include "syscall.h" + +io_syscall3(int, io_submit, io_submit, io_context_t, ctx, long, nr, struct iocb **, iocbs) diff --git a/src/libaio.h b/src/libaio.h new file mode 100644 index 0000000..2bc24e0 --- /dev/null +++ b/src/libaio.h @@ -0,0 +1,300 @@ +/* /usr/include/libaio.h + * + * Copyright 2000,2001,2002 Red Hat, Inc. + * + * Written by Benjamin LaHaise + * + * libaio Linux async I/O interface + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#ifndef __LIBAIO_H +#define __LIBAIO_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include + +struct timespec; +struct sockaddr; +struct iovec; + +typedef struct io_context *io_context_t; + +typedef enum io_iocb_cmd { + IO_CMD_PREAD = 0, + IO_CMD_PWRITE = 1, + + IO_CMD_FSYNC = 2, + IO_CMD_FDSYNC = 3, + + IO_CMD_POLL = 5, + IO_CMD_NOOP = 6, + IO_CMD_PREADV = 7, + IO_CMD_PWRITEV = 8, +} io_iocb_cmd_t; + +/* little endian, 32 bits */ +#if defined(__i386__) || (defined(__arm__) && !defined(__ARMEB__)) || \ + defined(__sh__) || defined(__bfin__) || defined(__MIPSEL__) || \ + defined(__cris__) || (defined(__riscv) && __riscv_xlen == 32) || \ + (defined(__GNUC__) && defined(__BYTE_ORDER__) && \ + __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ && __SIZEOF_LONG__ == 4) +#define PADDED(x, y) x; unsigned y +#define PADDEDptr(x, y) x; unsigned y +#define PADDEDul(x, y) unsigned long x; unsigned y + +/* little endian, 64 bits */ +#elif defined(__ia64__) || defined(__x86_64__) || defined(__alpha__) || \ + (defined(__aarch64__) && defined(__AARCH64EL__)) || \ + (defined(__riscv) && __riscv_xlen == 64) || \ + (defined(__GNUC__) && defined(__BYTE_ORDER__) && \ + __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ && __SIZEOF_LONG__ == 8) +#define PADDED(x, y) x, y +#define PADDEDptr(x, y) x +#define PADDEDul(x, y) unsigned long x + +/* big endian, 64 bits */ +#elif defined(__powerpc64__) || defined(__s390x__) || \ + (defined(__sparc__) && defined(__arch64__)) || \ + (defined(__aarch64__) && defined(__AARCH64EB__)) || \ + (defined(__GNUC__) && defined(__BYTE_ORDER__) && \ + __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ && __SIZEOF_LONG__ == 8) +#define PADDED(x, y) unsigned y; x +#define PADDEDptr(x,y) x +#define PADDEDul(x, y) unsigned long x + +/* big endian, 32 bits */ +#elif defined(__PPC__) || defined(__s390__) || \ + (defined(__arm__) && defined(__ARMEB__)) || \ + defined(__sparc__) || defined(__MIPSEB__) || defined(__m68k__) || \ + defined(__hppa__) || defined(__frv__) || defined(__avr32__) || \ + (defined(__GNUC__) && defined(__BYTE_ORDER__) && \ + __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ && __SIZEOF_LONG__ == 4) +#define PADDED(x, y) unsigned y; x +#define PADDEDptr(x, y) unsigned y; x +#define PADDEDul(x, y) unsigned y; unsigned long x + +#else +#error endian? +#endif + +struct io_iocb_poll { + PADDED(int events, __pad1); +}; /* result code is the set of result flags or -'ve errno */ + +struct io_iocb_sockaddr { + struct sockaddr *addr; + int len; +}; /* result code is the length of the sockaddr, or -'ve errno */ + +struct io_iocb_common { + PADDEDptr(void *buf, __pad1); + PADDEDul(nbytes, __pad2); + long long offset; + long long __pad3; + unsigned flags; + unsigned resfd; +}; /* result code is the amount read or -'ve errno */ + +struct io_iocb_vector { + const struct iovec *vec; + int nr; + long long offset; +}; /* result code is the amount read or -'ve errno */ + +struct iocb { + PADDEDptr(void *data, __pad1); /* Return in the io completion event */ + /* key: For use in identifying io requests */ + /* aio_rw_flags: RWF_* flags (such as RWF_NOWAIT) */ + PADDED(unsigned key, aio_rw_flags); + + short aio_lio_opcode; + short aio_reqprio; + int aio_fildes; + + union { + struct io_iocb_common c; + struct io_iocb_vector v; + struct io_iocb_poll poll; + struct io_iocb_sockaddr saddr; + } u; +}; + +struct io_event { + PADDEDptr(void *data, __pad1); + PADDEDptr(struct iocb *obj, __pad2); + PADDEDul(res, __pad3); + PADDEDul(res2, __pad4); +}; + +#undef PADDED +#undef PADDEDptr +#undef PADDEDul + +typedef void (*io_callback_t)(io_context_t ctx, struct iocb *iocb, long res, long res2); + +/* library wrappers */ +extern int io_queue_init(int maxevents, io_context_t *ctxp); +/*extern int io_queue_grow(io_context_t ctx, int new_maxevents);*/ +extern int io_queue_release(io_context_t ctx); +/*extern int io_queue_wait(io_context_t ctx, struct timespec *timeout);*/ +extern int io_queue_run(io_context_t ctx); + +/* Actual syscalls */ +extern int io_setup(int maxevents, io_context_t *ctxp); +extern int io_destroy(io_context_t ctx); +extern int io_submit(io_context_t ctx, long nr, struct iocb *ios[]); +extern int io_cancel(io_context_t ctx, struct iocb *iocb, struct io_event *evt); +extern int io_getevents(io_context_t ctx_id, long min_nr, long nr, struct io_event *events, struct timespec *timeout); +extern int io_pgetevents(io_context_t ctx_id, long min_nr, long nr, + struct io_event *events, struct timespec *timeout, + sigset_t *sigmask); + + +static inline void io_set_callback(struct iocb *iocb, io_callback_t cb) +{ + iocb->data = (void *)cb; +} + +static inline void io_prep_pread(struct iocb *iocb, int fd, void *buf, size_t count, long long offset) +{ + memset(iocb, 0, sizeof(*iocb)); + iocb->aio_fildes = fd; + iocb->aio_lio_opcode = IO_CMD_PREAD; + iocb->aio_reqprio = 0; + iocb->u.c.buf = buf; + iocb->u.c.nbytes = count; + iocb->u.c.offset = offset; +} + +static inline void io_prep_pwrite(struct iocb *iocb, int fd, void *buf, size_t count, long long offset) +{ + memset(iocb, 0, sizeof(*iocb)); + iocb->aio_fildes = fd; + iocb->aio_lio_opcode = IO_CMD_PWRITE; + iocb->aio_reqprio = 0; + iocb->u.c.buf = buf; + iocb->u.c.nbytes = count; + iocb->u.c.offset = offset; +} + +static inline void io_prep_preadv(struct iocb *iocb, int fd, const struct iovec *iov, int iovcnt, long long offset) +{ + memset(iocb, 0, sizeof(*iocb)); + iocb->aio_fildes = fd; + iocb->aio_lio_opcode = IO_CMD_PREADV; + iocb->aio_reqprio = 0; + iocb->u.c.buf = (void *)iov; + iocb->u.c.nbytes = iovcnt; + iocb->u.c.offset = offset; +} + +static inline void io_prep_pwritev(struct iocb *iocb, int fd, const struct iovec *iov, int iovcnt, long long offset) +{ + memset(iocb, 0, sizeof(*iocb)); + iocb->aio_fildes = fd; + iocb->aio_lio_opcode = IO_CMD_PWRITEV; + iocb->aio_reqprio = 0; + iocb->u.c.buf = (void *)iov; + iocb->u.c.nbytes = iovcnt; + iocb->u.c.offset = offset; +} + +static inline void io_prep_preadv2(struct iocb *iocb, int fd, const struct iovec *iov, int iovcnt, long long offset, int flags) +{ + memset(iocb, 0, sizeof(*iocb)); + iocb->aio_fildes = fd; + iocb->aio_lio_opcode = IO_CMD_PREADV; + iocb->aio_reqprio = 0; + iocb->aio_rw_flags = flags; + iocb->u.c.buf = (void *)iov; + iocb->u.c.nbytes = iovcnt; + iocb->u.c.offset = offset; +} + +static inline void io_prep_pwritev2(struct iocb *iocb, int fd, const struct iovec *iov, int iovcnt, long long offset, int flags) +{ + memset(iocb, 0, sizeof(*iocb)); + iocb->aio_fildes = fd; + iocb->aio_lio_opcode = IO_CMD_PWRITEV; + iocb->aio_reqprio = 0; + iocb->aio_rw_flags = flags; + iocb->u.c.buf = (void *)iov; + iocb->u.c.nbytes = iovcnt; + iocb->u.c.offset = offset; +} + +static inline void io_prep_poll(struct iocb *iocb, int fd, int events) +{ + memset(iocb, 0, sizeof(*iocb)); + iocb->aio_fildes = fd; + iocb->aio_lio_opcode = IO_CMD_POLL; + iocb->aio_reqprio = 0; + iocb->u.poll.events = events; +} + +static inline int io_poll(io_context_t ctx, struct iocb *iocb, io_callback_t cb, int fd, int events) +{ + io_prep_poll(iocb, fd, events); + io_set_callback(iocb, cb); + return io_submit(ctx, 1, &iocb); +} + +static inline void io_prep_fsync(struct iocb *iocb, int fd) +{ + memset(iocb, 0, sizeof(*iocb)); + iocb->aio_fildes = fd; + iocb->aio_lio_opcode = IO_CMD_FSYNC; + iocb->aio_reqprio = 0; +} + +static inline int io_fsync(io_context_t ctx, struct iocb *iocb, io_callback_t cb, int fd) +{ + io_prep_fsync(iocb, fd); + io_set_callback(iocb, cb); + return io_submit(ctx, 1, &iocb); +} + +static inline void io_prep_fdsync(struct iocb *iocb, int fd) +{ + memset(iocb, 0, sizeof(*iocb)); + iocb->aio_fildes = fd; + iocb->aio_lio_opcode = IO_CMD_FDSYNC; + iocb->aio_reqprio = 0; +} + +static inline int io_fdsync(io_context_t ctx, struct iocb *iocb, io_callback_t cb, int fd) +{ + io_prep_fdsync(iocb, fd); + io_set_callback(iocb, cb); + return io_submit(ctx, 1, &iocb); +} + +static inline void io_set_eventfd(struct iocb *iocb, int eventfd) +{ + iocb->u.c.flags |= (1 << 0) /* IOCB_FLAG_RESFD */; + iocb->u.c.resfd = eventfd; +} + +#ifdef __cplusplus +} +#endif + +#endif /* __LIBAIO_H */ diff --git a/src/libaio.map b/src/libaio.map new file mode 100644 index 0000000..ec9d13b --- /dev/null +++ b/src/libaio.map @@ -0,0 +1,27 @@ +LIBAIO_0.1 { + global: + io_queue_init; + io_queue_run; + io_queue_wait; + io_queue_release; + io_cancel; + io_submit; + io_getevents; + local: + *; + +}; + +LIBAIO_0.4 { + global: + io_setup; + io_destroy; + io_cancel; + io_getevents; + io_queue_wait; +} LIBAIO_0.1; + +LIBAIO_0.5 { + global: + io_pgetevents; +} LIBAIO_0.4; diff --git a/src/raw_syscall.c b/src/raw_syscall.c new file mode 100644 index 0000000..c3fe4b8 --- /dev/null +++ b/src/raw_syscall.c @@ -0,0 +1,19 @@ +#include "syscall.h" + +#if defined(__ia64__) +/* based on code from glibc by Jes Sorensen */ +__asm__(".text\n" + ".globl __ia64_aio_raw_syscall\n" + ".proc __ia64_aio_raw_syscall\n" + "__ia64_aio_raw_syscall:\n" + "alloc r2=ar.pfs,1,0,8,0\n" + "mov r15=r32\n" + "break 0x100000\n" + ";;" + "br.ret.sptk.few b0\n" + ".size __ia64_aio_raw_syscall, . - __ia64_aio_raw_syscall\n" + ".endp __ia64_aio_raw_syscall" +); +#endif + +; diff --git a/src/syscall-alpha.h b/src/syscall-alpha.h new file mode 100644 index 0000000..0aa4d3d --- /dev/null +++ b/src/syscall-alpha.h @@ -0,0 +1,5 @@ +#define __NR_io_setup 398 +#define __NR_io_destroy 399 +#define __NR_io_getevents 400 +#define __NR_io_submit 401 +#define __NR_io_cancel 402 diff --git a/src/syscall-arm.h b/src/syscall-arm.h new file mode 100644 index 0000000..556852b --- /dev/null +++ b/src/syscall-arm.h @@ -0,0 +1,26 @@ +/* + * linux/include/asm-arm/unistd.h + * + * Copyright (C) 2001-2005 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Please forward _all_ changes to this file to rmk@arm.linux.org.uk, + * no matter what the change is. Thanks! + */ + +#define __NR_OABI_SYSCALL_BASE 0x900000 + +#if defined(__thumb__) || defined(__ARM_EABI__) +#define __NR_SYSCALL_BASE 0 +#else +#define __NR_SYSCALL_BASE __NR_OABI_SYSCALL_BASE +#endif + +#define __NR_io_setup (__NR_SYSCALL_BASE+243) +#define __NR_io_destroy (__NR_SYSCALL_BASE+244) +#define __NR_io_getevents (__NR_SYSCALL_BASE+245) +#define __NR_io_submit (__NR_SYSCALL_BASE+246) +#define __NR_io_cancel (__NR_SYSCALL_BASE+247) diff --git a/src/syscall-generic.h b/src/syscall-generic.h new file mode 100644 index 0000000..b217b53 --- /dev/null +++ b/src/syscall-generic.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * This is based on the include/uapi/asm-generic/unistd.h header file + * in the kernel, which is a generic syscall schema for new architectures. + */ + +#define __NR_io_setup 0 +#define __NR_io_destroy 1 +#define __NR_io_submit 2 +#define __NR_io_cancel 3 +#define __NR_io_getevents 4 diff --git a/src/syscall-i386.h b/src/syscall-i386.h new file mode 100644 index 0000000..bc66bb1 --- /dev/null +++ b/src/syscall-i386.h @@ -0,0 +1,6 @@ +#define __NR_io_setup 245 +#define __NR_io_destroy 246 +#define __NR_io_getevents 247 +#define __NR_io_submit 248 +#define __NR_io_cancel 249 +#define __NR_io_pgetevents 385 diff --git a/src/syscall-ia64.h b/src/syscall-ia64.h new file mode 100644 index 0000000..a21e93b --- /dev/null +++ b/src/syscall-ia64.h @@ -0,0 +1,5 @@ +#define __NR_io_setup 1238 +#define __NR_io_destroy 1239 +#define __NR_io_getevents 1240 +#define __NR_io_submit 1241 +#define __NR_io_cancel 1242 diff --git a/src/syscall-ppc.h b/src/syscall-ppc.h new file mode 100644 index 0000000..dcfb118 --- /dev/null +++ b/src/syscall-ppc.h @@ -0,0 +1,5 @@ +#define __NR_io_setup 227 +#define __NR_io_destroy 228 +#define __NR_io_getevents 229 +#define __NR_io_submit 230 +#define __NR_io_cancel 231 diff --git a/src/syscall-s390.h b/src/syscall-s390.h new file mode 100644 index 0000000..f0805f5 --- /dev/null +++ b/src/syscall-s390.h @@ -0,0 +1,5 @@ +#define __NR_io_setup 243 +#define __NR_io_destroy 244 +#define __NR_io_getevents 245 +#define __NR_io_submit 246 +#define __NR_io_cancel 247 diff --git a/src/syscall-sparc.h b/src/syscall-sparc.h new file mode 100644 index 0000000..3e63e92 --- /dev/null +++ b/src/syscall-sparc.h @@ -0,0 +1,5 @@ +#define __NR_io_setup 268 +#define __NR_io_destroy 269 +#define __NR_io_submit 270 +#define __NR_io_cancel 271 +#define __NR_io_getevents 272 diff --git a/src/syscall-x86_64.h b/src/syscall-x86_64.h new file mode 100644 index 0000000..0eccef3 --- /dev/null +++ b/src/syscall-x86_64.h @@ -0,0 +1,6 @@ +#define __NR_io_setup 206 +#define __NR_io_destroy 207 +#define __NR_io_getevents 208 +#define __NR_io_submit 209 +#define __NR_io_cancel 210 +#define __NR_io_pgetevents 333 diff --git a/src/syscall.h b/src/syscall.h new file mode 100644 index 0000000..b53da4c --- /dev/null +++ b/src/syscall.h @@ -0,0 +1,73 @@ +#include +#include +#include + +#define _SYMSTR(str) #str +#define SYMSTR(str) _SYMSTR(str) + +#define SYMVER(compat_sym, orig_sym, ver_sym) \ + __asm__(".symver " SYMSTR(compat_sym) "," SYMSTR(orig_sym) "@LIBAIO_" SYMSTR(ver_sym)); + +#define DEFSYMVER(compat_sym, orig_sym, ver_sym) \ + __asm__(".symver " SYMSTR(compat_sym) "," SYMSTR(orig_sym) "@@LIBAIO_" SYMSTR(ver_sym)); + +#if defined(__i386__) +#include "syscall-i386.h" +#elif defined(__x86_64__) +#include "syscall-x86_64.h" +#elif defined(__ia64__) +#include "syscall-ia64.h" +#elif defined(__PPC__) +#include "syscall-ppc.h" +#elif defined(__s390__) +#include "syscall-s390.h" +#elif defined(__alpha__) +#include "syscall-alpha.h" +#elif defined(__arm__) +#include "syscall-arm.h" +#elif defined(__sparc__) +#include "syscall-sparc.h" +#elif defined(__aarch64__) || defined(__riscv) +#include "syscall-generic.h" +#else +#warning "using system call numbers from sys/syscall.h" +#endif + +#define _body_io_syscall(sname, args...) \ +{ \ + int ret, saved_errno; \ + saved_errno = errno; \ + ret= syscall(__NR_##sname, ## args); \ + if (ret < 0) { \ + ret = -errno; \ + errno = saved_errno; \ + } \ + return ret; \ +} + +#define io_syscall1(type,fname,sname,type1,arg1) \ +type fname(type1 arg1) \ +_body_io_syscall(sname, (long)arg1) + +#define io_syscall2(type,fname,sname,type1,arg1,type2,arg2) \ +type fname(type1 arg1,type2 arg2) \ +_body_io_syscall(sname, (long)arg1, (long)arg2) + +#define io_syscall3(type,fname,sname,type1,arg1,type2,arg2,type3,arg3) \ +type fname(type1 arg1,type2 arg2,type3 arg3) \ +_body_io_syscall(sname, (long)arg1, (long)arg2, (long)arg3) + +#define io_syscall4(type,fname,sname,type1,arg1,type2,arg2,type3,arg3,type4,arg4) \ +type fname (type1 arg1, type2 arg2, type3 arg3, type4 arg4) \ +_body_io_syscall(sname, (long)arg1, (long)arg2, (long)arg3, (long)arg4) + +#define io_syscall5(type,fname,sname,type1,arg1,type2,arg2,type3,arg3,type4,arg4, type5,arg5) \ +type fname (type1 arg1,type2 arg2,type3 arg3,type4 arg4,type5 arg5) \ +_body_io_syscall(sname, (long)arg1, (long)arg2, (long)arg3, (long)arg4, (long)arg5) + +#define io_syscall6(type,fname,sname,type1,arg1,type2,arg2,type3,arg3, \ + type4,arg4,type5,arg5,type6,arg6) \ +type fname (type1 arg1,type2 arg2,type3 arg3,type4 arg4,type5 arg5, \ + type6 arg6) \ +_body_io_syscall(sname, (long)arg1, (long)arg2, (long)arg3, (long)arg4, \ + (long)arg5, (long)arg6) diff --git a/src/vsys_def.h b/src/vsys_def.h new file mode 100644 index 0000000..13d032e --- /dev/null +++ b/src/vsys_def.h @@ -0,0 +1,24 @@ +/* libaio Linux async I/O interface + Copyright 2002 Red Hat, Inc. + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +extern int vsys_io_setup(unsigned nr_reqs, io_context_t *ctxp); +extern int vsys_io_destroy(io_context_t ctx); +extern int vsys_io_submit(io_context_t ctx, long nr, struct iocb *iocbs[]); +extern int vsys_io_cancel(io_context_t ctx, struct iocb *iocb); +extern int vsys_io_wait(io_context_t ctx, struct iocb *iocb, const struct timespec *when); +extern int vsys_io_getevents(io_context_t ctx_id, long nr, struct io_event *events, const struct timespec *timeout); + -- 2.30.2