zoukankan      html  css  js  c++  java
  • [转载]hdfs c/c++ API

    原文链接:http://blog.csdn.net/sprintfwater/article/details/8996214

    1.建立、关闭与HDFS连接:hdfsConnect()、hdfsConnectAsUser()、hdfsDisconnect()。hdfsConnect()实际上是直接调用hdfsConnectAsUser。

    2.打开、关闭HDFS文件:hdfsOpenFile()、hdfsCloseFile()。当用hdfsOpenFile()创建文件时,可以指定replication和blocksize参数。写打开一个文件时,隐含O_TRUNC标志,文件会被截断,写入是从文件头开始的。

    3.读HDFS文件:hdfsRead()、hdfsPread()。两个函数都有可能返回少于用户要求的字节数,此时可以再次调用这两个函数读入剩下的部分(类似APUE中的readn实现);只有在两个函数返回零时,我们才能断定到了文件末尾。

    4.写HDFS文件:hdfsWrite()。HDFS不支持随机写,只能是从文件头顺序写入。

    5.查询HDFS文件信息:hdfsGetPathInfo()

    6.查询和设置HDFS文件读写偏移量:hdfsSeek()、hdfsTell()

    7.查询数据块所在节点信息:hdfsGetHosts()。返回一个或多个数据块所在数据节点的信息,一个数据块可能存在多个数据节点上。

    8.libhdfs中的函数是通过jni调用JAVA虚拟机,在虚拟机中构造对应的HDFS的JAVA类,然后反射调用该类的功能函数。总会发生JVM和程序之间内存拷贝的动作,性能方面值得注意。

    9.HDFS不支持多个客户端同时写入的操作,无文件或是记录锁的概念。

    10.建议只有超大文件才应该考虑放在HDFS上,而且最好对文件的访问是写一次,读多次。小文件不应该考虑放在HDFS上,得不偿失!

      1 /**
      2  * Licensed to the Apache Software Foundation (ASF) under one
      3  * or more contributor license agreements.  See the NOTICE file
      4  * distributed with this work for additional information
      5  * regarding copyright ownership.  The ASF licenses this file
      6  * to you under the Apache License, Version 2.0 (the
      7  * "License"); you may not use this file except in compliance
      8  * with the License.  You may obtain a copy of the License at
      9  *
     10  *     http://www.apache.org/licenses/LICENSE-2.0
     11  *
     12  * Unless required by applicable law or agreed to in writing, software
     13  * distributed under the License is distributed on an "AS IS" BASIS,
     14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     15  * See the License for the specific language governing permissions and
     16  * limitations under the License.
     17  */
     18 
     19 #ifndef LIBHDFS_HDFS_H
     20 #define LIBHDFS_HDFS_H
     21 
     22 #include <sys/types.h>
     23 #include <sys/stat.h>
     24 
     25 #include <fcntl.h>
     26 #include <stdio.h>
     27 #include <stdint.h>
     28 #include <string.h>
     29 #include <stdlib.h>
     30 #include <time.h>
     31 #include <errno.h>
     32 
     33 #include <jni.h>
     34 
     35 #ifndef O_RDONLY
     36 #define O_RDONLY 1
     37 #endif
     38 
     39 #ifndef O_WRONLY 
     40 #define O_WRONLY 2
     41 #endif
     42 
     43 #ifndef EINTERNAL
     44 #define EINTERNAL 255 
     45 #endif
     46 
     47 
     48 /** All APIs set errno to meaningful values */
     49 #ifdef __cplusplus
     50 extern  "C" {
     51 #endif
     52 
     53     /**
     54      * Some utility decls used in libhdfs.
     55      */
     56 
     57     typedef int32_t   tSize; /// size of data for read/write io ops 
     58     typedef time_t    tTime; /// time type
     59     typedef int64_t   tOffset;/// offset within the file
     60     typedef uint16_t  tPort; /// port
     61     typedef enum tObjectKind {
     62         kObjectKindFile = 'F',
     63         kObjectKindDirectory = 'D',
     64     } tObjectKind;
     65 
     66 
     67     /**
     68      * The C reflection of org.apache.org.hadoop.FileSystem .
     69      */
     70     typedef void* hdfsFS;
     71 
     72     
     73     /**
     74      * The C equivalent of org.apache.org.hadoop.FSData(Input|Output)Stream .
     75      */
     76     enum hdfsStreamType
     77     {
     78         UNINITIALIZED = 0,
     79         INPUT = 1,
     80         OUTPUT = 2,
     81     };
     82 
     83     
     84     /**
     85      * The 'file-handle' to a file in hdfs.
     86      */
     87     struct hdfsFile_internal {
     88         void* file;
     89         enum hdfsStreamType type;
     90     };
     91     typedef struct hdfsFile_internal* hdfsFile;
     92       
     93 
     94     /** 
     95      * hdfsConnect - Connect to a hdfs file system.
     96      * Connect to the hdfs.
     97      * @param host A string containing either a host name, or an ip address
     98      * of the namenode of a hdfs cluster. 'host' should be passed as NULL if
     99      * you want to connect to local filesystem. 'host' should be passed as
    100      * 'default' (and port as 0) to used the 'configured' filesystem
    101      * (hadoop-site/hadoop-default.xml).
    102      * @param port The port on which the server is listening.
    103      * @return Returns a handle to the filesystem or NULL on error.
    104      */
    105     hdfsFS hdfsConnect(const char* host, tPort port);
    106 
    107 
    108     /** 
    109      * hdfsDisconnect - Disconnect from the hdfs file system.
    110      * Disconnect from hdfs.
    111      * @param fs The configured filesystem handle.
    112      * @return Returns 0 on success, -1 on error.  
    113      */
    114     int hdfsDisconnect(hdfsFS fs);
    115         
    116 
    117     /** 
    118      * hdfsOpenFile - Open a hdfs file in given mode.
    119      * @param fs The configured filesystem handle.
    120      * @param path The full path to the file.
    121      * @param flags Either O_RDONLY or O_WRONLY, for read-only or write-only.
    122      * @param bufferSize Size of buffer for read/write - pass 0 if you want
    123      * to use the default configured values.
    124      * @param replication Block replication - pass 0 if you want to use
    125      * the default configured values.
    126      * @param blocksize Size of block - pass 0 if you want to use the
    127      * default configured values.
    128      * @return Returns the handle to the open file or NULL on error.
    129      */
    130     hdfsFile hdfsOpenFile(hdfsFS fs, const char* path, int flags,
    131                           int bufferSize, short replication, tSize blocksize);
    132 
    133 
    134     /** 
    135      * hdfsCloseFile - Close an open file. 
    136      * @param fs The configured filesystem handle.
    137      * @param file The file handle.
    138      * @return Returns 0 on success, -1 on error.  
    139      */
    140     int hdfsCloseFile(hdfsFS fs, hdfsFile file);
    141 
    142 
    143     /** 
    144      * hdfsExists - Checks if a given path exsits on the filesystem 
    145      * @param fs The configured filesystem handle.
    146      * @param path The path to look for
    147      * @return Returns 0 on success, -1 on error.  
    148      */
    149     int hdfsExists(hdfsFS fs, const char *path);
    150 
    151 
    152     /** 
    153      * hdfsSeek - Seek to given offset in file. 
    154      * This works only for files opened in read-only mode. 
    155      * @param fs The configured filesystem handle.
    156      * @param file The file handle.
    157      * @param desiredPos Offset into the file to seek into.
    158      * @return Returns 0 on success, -1 on error.  
    159      */
    160     int hdfsSeek(hdfsFS fs, hdfsFile file, tOffset desiredPos); 
    161 
    162 
    163     /** 
    164      * hdfsTell - Get the current offset in the file, in bytes.
    165      * @param fs The configured filesystem handle.
    166      * @param file The file handle.
    167      * @return Current offset, -1 on error.
    168      */
    169     tOffset hdfsTell(hdfsFS fs, hdfsFile file);
    170 
    171 
    172     /** 
    173      * hdfsRead - Read data from an open file.
    174      * @param fs The configured filesystem handle.
    175      * @param file The file handle.
    176      * @param buffer The buffer to copy read bytes into.
    177      * @param length The length of the buffer.
    178      * @return Returns the number of bytes actually read, possibly less
    179      * than than length;-1 on error.
    180      */
    181     tSize hdfsRead(hdfsFS fs, hdfsFile file, void* buffer, tSize length);
    182 
    183 
    184     /** 
    185      * hdfsPread - Positional read of data from an open file.
    186      * @param fs The configured filesystem handle.
    187      * @param file The file handle.
    188      * @param position Position from which to read
    189      * @param buffer The buffer to copy read bytes into.
    190      * @param length The length of the buffer.
    191      * @return Returns the number of bytes actually read, possibly less than
    192      * than length;-1 on error.
    193      */
    194     tSize hdfsPread(hdfsFS fs, hdfsFile file, tOffset position,
    195                     void* buffer, tSize length);
    196 
    197 
    198     /** 
    199      * hdfsWrite - Write data into an open file.
    200      * @param fs The configured filesystem handle.
    201      * @param file The file handle.
    202      * @param buffer The data.
    203      * @param length The no. of bytes to write. 
    204      * @return Returns the number of bytes written, -1 on error.
    205      */
    206     tSize hdfsWrite(hdfsFS fs, hdfsFile file, const void* buffer,
    207                     tSize length);
    208 
    209 
    210     /** 
    211      * hdfsWrite - Flush the data. 
    212      * @param fs The configured filesystem handle.
    213      * @param file The file handle.
    214      * @return Returns 0 on success, -1 on error. 
    215      */
    216     int hdfsFlush(hdfsFS fs, hdfsFile file);
    217 
    218 
    219     /**
    220      * hdfsAvailable - Number of bytes that can be read from this
    221      * input stream without blocking.
    222      * @param fs The configured filesystem handle.
    223      * @param file The file handle.
    224      * @return Returns available bytes; -1 on error. 
    225      */
    226     int hdfsAvailable(hdfsFS fs, hdfsFile file);
    227 
    228 
    229     /**
    230      * hdfsCopy - Copy file from one filesystem to another.
    231      * @param srcFS The handle to source filesystem.
    232      * @param src The path of source file. 
    233      * @param dstFS The handle to destination filesystem.
    234      * @param dst The path of destination file. 
    235      * @return Returns 0 on success, -1 on error. 
    236      */
    237     int hdfsCopy(hdfsFS srcFS, const char* src, hdfsFS dstFS, const char* dst);
    238 
    239 
    240     /**
    241      * hdfsMove - Move file from one filesystem to another.
    242      * @param srcFS The handle to source filesystem.
    243      * @param src The path of source file. 
    244      * @param dstFS The handle to destination filesystem.
    245      * @param dst The path of destination file. 
    246      * @return Returns 0 on success, -1 on error. 
    247      */
    248     int hdfsMove(hdfsFS srcFS, const char* src, hdfsFS dstFS, const char* dst);
    249 
    250 
    251     /**
    252      * hdfsDelete - Delete file. 
    253      * @param fs The configured filesystem handle.
    254      * @param path The path of the file. 
    255      * @return Returns 0 on success, -1 on error. 
    256      */
    257     int hdfsDelete(hdfsFS fs, const char* path);
    258 
    259 
    260     /**
    261      * hdfsRename - Rename file. 
    262      * @param fs The configured filesystem handle.
    263      * @param oldPath The path of the source file. 
    264      * @param newPath The path of the destination file. 
    265      * @return Returns 0 on success, -1 on error. 
    266      */
    267     int hdfsRename(hdfsFS fs, const char* oldPath, const char* newPath);
    268 
    269 
    270     /** 
    271      * hdfsGetWorkingDirectory - Get the current working directory for
    272      * the given filesystem.
    273      * @param fs The configured filesystem handle.
    274      * @param buffer The user-buffer to copy path of cwd into. 
    275      * @param bufferSize The length of user-buffer.
    276      * @return Returns buffer, NULL on error.
    277      */
    278     char* hdfsGetWorkingDirectory(hdfsFS fs, char *buffer, size_t bufferSize);
    279 
    280 
    281     /** 
    282      * hdfsSetWorkingDirectory - Set the working directory. All relative
    283      * paths will be resolved relative to it.
    284      * @param fs The configured filesystem handle.
    285      * @param path The path of the new 'cwd'. 
    286      * @return Returns 0 on success, -1 on error. 
    287      */
    288     int hdfsSetWorkingDirectory(hdfsFS fs, const char* path);
    289 
    290 
    291     /** 
    292      * hdfsCreateDirectory - Make the given file and all non-existent
    293      * parents into directories.
    294      * @param fs The configured filesystem handle.
    295      * @param path The path of the directory. 
    296      * @return Returns 0 on success, -1 on error. 
    297      */
    298     int hdfsCreateDirectory(hdfsFS fs, const char* path);
    299 
    300 
    301     /** 
    302      * hdfsSetReplication - Set the replication of the specified
    303      * file to the supplied value
    304      * @param fs The configured filesystem handle.
    305      * @param path The path of the file. 
    306      * @return Returns 0 on success, -1 on error. 
    307      */
    308     int hdfsSetReplication(hdfsFS fs, const char* path, int16_t replication);
    309 
    310 
    311     /** 
    312      * hdfsFileInfo - Information about a file/directory.
    313      */
    314     typedef struct  {
    315         tObjectKind mKind;   /* file or directory */
    316         char *mName;         /* the name of the file */
    317         tTime mLastMod;      /* the last modification time for the file*/
    318         tOffset mSize;       /* the size of the file in bytes */
    319         short mReplication;    /* the count of replicas */
    320         tOffset mBlockSize;  /* the block size for the file */
    321     } hdfsFileInfo;
    322 
    323 
    324     /** 
    325      * hdfsListDirectory - Get list of files/directories for a given
    326      * directory-path. hdfsFreeFileInfo should be called to deallocate memory. 
    327      * @param fs The configured filesystem handle.
    328      * @param path The path of the directory. 
    329      * @param numEntries Set to the number of files/directories in path.
    330      * @return Returns a dynamically-allocated array of hdfsFileInfo
    331      * objects; NULL on error.
    332      */
    333     hdfsFileInfo *hdfsListDirectory(hdfsFS fs, const char* path,
    334                                     int *numEntries);
    335 
    336 
    337     /** 
    338      * hdfsGetPathInfo - Get information about a path as a (dynamically
    339      * allocated) single hdfsFileInfo struct. hdfsFreeFileInfo should be
    340      * called when the pointer is no longer needed.
    341      * @param fs The configured filesystem handle.
    342      * @param path The path of the file. 
    343      * @return Returns a dynamically-allocated hdfsFileInfo object;
    344      * NULL on error.
    345      */
    346     hdfsFileInfo *hdfsGetPathInfo(hdfsFS fs, const char* path);
    347 
    348 
    349     /** 
    350      * hdfsFreeFileInfo - Free up the hdfsFileInfo array (including fields) 
    351      * @param hdfsFileInfo The array of dynamically-allocated hdfsFileInfo
    352      * objects.
    353      * @param numEntries The size of the array.
    354      */
    355     void hdfsFreeFileInfo(hdfsFileInfo *hdfsFileInfo, int numEntries);
    356 
    357 
    358     /** 
    359      * hdfsGetHosts - Get hostnames where a particular block (determined by
    360      * pos & blocksize) of a file is stored. The last element in the array
    361      * is NULL. Due to replication, a single block could be present on
    362      * multiple hosts.
    363      * @param fs The configured filesystem handle.
    364      * @param path The path of the file. 
    365      * @param start The start of the block.
    366      * @param length The length of the block.
    367      * @return Returns a dynamically-allocated 2-d array of blocks-hosts;
    368      * NULL on error.
    369      */
    370     char*** hdfsGetHosts(hdfsFS fs, const char* path, 
    371             tOffset start, tOffset length);
    372 
    373 
    374     /** 
    375      * hdfsFreeHosts - Free up the structure returned by hdfsGetHosts
    376      * @param hdfsFileInfo The array of dynamically-allocated hdfsFileInfo
    377      * objects.
    378      * @param numEntries The size of the array.
    379      */
    380     void hdfsFreeHosts(char ***blockHosts);
    381 
    382 
    383     /** 
    384      * hdfsGetDefaultBlockSize - Get the optimum blocksize.
    385      * @param fs The configured filesystem handle.
    386      * @return Returns the blocksize; -1 on error. 
    387      */
    388     tOffset hdfsGetDefaultBlockSize(hdfsFS fs);
    389 
    390 
    391     /** 
    392      * hdfsGetCapacity - Return the raw capacity of the filesystem.  
    393      * @param fs The configured filesystem handle.
    394      * @return Returns the raw-capacity; -1 on error. 
    395      */
    396     tOffset hdfsGetCapacity(hdfsFS fs);
    397 
    398 
    399     /** 
    400      * hdfsGetUsed - Return the total raw size of all files in the filesystem.
    401      * @param fs The configured filesystem handle.
    402      * @return Returns the total-size; -1 on error. 
    403      */
    404     tOffset hdfsGetUsed(hdfsFS fs);
    405     
    406 #ifdef __cplusplus
    407 }
    408 #endif
    409 
    410 #endif /*LIBHDFS_HDFS_H*/
  • 相关阅读:
    【测试平台学习1】 vue使用与启动
    【Devops】 发布一个Python项目(Flask服务后端)到K8S环境
    Spring5源码分析(024)——IoC篇之bean加载:parentBeanFactory和依赖处理
    Spring5源码分析(023)——IoC篇之bean加载:从缓存中获取单例 bean
    Java基础(018):Class.forName和ClassLoader的区别
    Spring5源码分析(022)——IoC篇之bean加载:FactoryBean的用法
    Spring5源码分析(021)——IoC篇之bean加载
    Java基础(017):反射初探
    Java基础(015):泛型
    Java基础(001):关于 short i = 1; i += 1;
  • 原文地址:https://www.cnblogs.com/caoyingjie/p/3932941.html
Copyright © 2011-2022 走看看