swish-efiles.1.3.2.tar.gz

  • 管理员
    了解作者
  • Perl
    开发工具
  • 114KB
    文件大小
  • gz
    文件格式
  • 0
    收藏次数
  • 1 积分
    下载积分
  • 2000
    下载次数
  • 2001-02-06 20:41
    上传日期
用C语言写的搜索引擎,包含多种建立索引的方式
swish-efiles.1.3.2.tar.gz
  • src
  • win32
  • dirent.c
    5.1KB
  • dirent.h
    1.5KB
  • regex.h
    18.3KB
  • regex.c
    158KB
  • docprop.h
    1KB
  • hash.c
    5.1KB
  • test.html
    358B
  • swishspider
    1.9KB
  • Makefile
    1.2KB
  • index.swish
    2.9KB
  • index.h
    2.4KB
  • methods.c
    501B
  • index.c
    29KB
  • mem.c
    1.4KB
  • fs.c
    10KB
  • check.h
    1.1KB
  • stemmer.h
    176B
  • file.c
    11.8KB
  • hash.h
    1.4KB
  • swish.c
    16KB
  • httpserver.c
    9.8KB
  • config.h
    9.1KB
  • http.h
    722B
  • docprop.c
    7.7KB
  • string.c
    13.8KB
  • error.h
    1KB
  • user.config
    7.2KB
  • check.c
    3.1KB
  • file.h
    1.8KB
  • merge.h
    4KB
  • mem.h
    1.1KB
  • http.c
    12.2KB
  • list.h
    1KB
  • httpserver.h
    635B
  • list.c
    1.3KB
  • swish.h
    14.1KB
  • search.c
    27.8KB
  • error.c
    1KB
  • stemmer.c
    17.9KB
  • search.h
    2.6KB
  • string.h
    1.9KB
  • merge.c
    23.1KB
  • README-SWISH-E
    4.1KB
内容介绍
/* Extended regular expression matching and search library, version 0.12. (Implements POSIX draft P10003.2/D11.2, except for internationalization features.) Copyright (C) 1993 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /* AIX requires this to be the first thing in the file. */ #if defined (_AIX) && !defined (REGEX_MALLOC) #pragma alloca #endif #define _GNU_SOURCE #ifdef _WIN32 #define HAVE_STRING_H 1 /* Win32 */ #define REGEX_MALLOC 1 /* Win32 */ #endif /* We need this for `regex.h', and perhaps for the Emacs include files. */ #include <sys/types.h> #ifdef HAVE_CONFIG_H #include "config.h" #endif /* The `emacs' switch turns on certain matching commands that make sense only in Emacs. */ #ifdef emacs #include "lisp.h" #include "buffer.h" #include "syntax.h" /* Emacs uses `NULL' as a predicate. */ #undef NULL #else /* not emacs */ /* We used to test for `BSTRING' here, but only GCC and Emacs define `BSTRING', as far as I know, and neither of them use this code. */ #if HAVE_STRING_H || STDC_HEADERS #include <string.h> #ifndef bcmp #define bcmp(s1, s2, n) memcmp ((s1), (s2), (n)) #endif #ifndef bcopy #define bcopy(s, d, n) memcpy ((d), (s), (n)) #endif #ifndef bzero #define bzero(s, n) memset ((s), 0, (n)) #endif #else #include <strings.h> #endif #ifdef STDC_HEADERS #include <stdlib.h> #else char *malloc (); char *realloc (); #endif /* Define the syntax stuff for \<, \>, etc. */ /* This must be nonzero for the wordchar and notwordchar pattern commands in re_match_2. */ #ifndef Sword #define Sword 1 #endif #ifdef SYNTAX_TABLE extern char *re_syntax_table; #else /* not SYNTAX_TABLE */ /* How many characters in the character set. */ #define CHAR_SET_SIZE 256 static char re_syntax_table[CHAR_SET_SIZE]; static void init_syntax_once () { register int c; static int done = 0; if (done) return; bzero (re_syntax_table, sizeof re_syntax_table); for (c = 'a'; c <= 'z'; c++) re_syntax_table[c] = Sword; for (c = 'A'; c <= 'Z'; c++) re_syntax_table[c] = Sword; for (c = '0'; c <= '9'; c++) re_syntax_table[c] = Sword; re_syntax_table['_'] = Sword; done = 1; } #endif /* not SYNTAX_TABLE */ #define SYNTAX(c) re_syntax_table[c] #endif /* not emacs */ /* Get the interface, including the syntax bits. */ #include "regex.h" /* isalpha etc. are used for the character classes. */ #include <ctype.h> #ifndef isascii #define isascii(c) 1 #endif #ifdef isblank #define ISBLANK(c) (isascii (c) && isblank (c)) #else #define ISBLANK(c) ((c) == ' ' || (c) == '\t') #endif #ifdef isgraph #define ISGRAPH(c) (isascii (c) && isgraph (c)) #else #define ISGRAPH(c) (isascii (c) && isprint (c) && !isspace (c)) #endif #define ISPRINT(c) (isascii (c) && isprint (c)) #define ISDIGIT(c) (isascii (c) && isdigit (c)) #define ISALNUM(c) (isascii (c) && isalnum (c)) #define ISALPHA(c) (isascii (c) && isalpha (c)) #define ISCNTRL(c) (isascii (c) && iscntrl (c)) #define ISLOWER(c) (isascii (c) && islower (c)) #define ISPUNCT(c) (isascii (c) && ispunct (c)) #define ISSPACE(c) (isascii (c) && isspace (c)) #define ISUPPER(c) (isascii (c) && isupper (c)) #define ISXDIGIT(c) (isascii (c) && isxdigit (c)) #ifndef NULL #define NULL 0 #endif /* We remove any previous definition of `SIGN_EXTEND_CHAR', since ours (we hope) works properly with all combinations of machines, compilers, `char' and `unsigned char' argument types. (Per Bothner suggested the basic approach.) */ #undef SIGN_EXTEND_CHAR #if __STDC__ #define SIGN_EXTEND_CHAR(c) ((signed char) (c)) #else /* not __STDC__ */ /* As in Harbison and Steele. */ #define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128) #endif /* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we use `alloca' instead of `malloc'. This is because using malloc in re_search* or re_match* could cause memory leaks when C-g is used in Emacs; also, malloc is slower and causes storage fragmentation. On the other hand, malloc is more portable, and easier to debug. Because we sometimes use alloca, some routines have to be macros, not functions -- `alloca'-allocated space disappears at the end of the function it is called in. */ #ifdef REGEX_MALLOC #define REGEX_ALLOCATE malloc #define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize) #else /* not REGEX_MALLOC */ /* Emacs already defines alloca, sometimes. */ #ifndef alloca /* Make alloca work the best possible way. */ #ifdef __GNUC__ #define alloca __builtin_alloca #else /* not __GNUC__ */ #if HAVE_ALLOCA_H #include <alloca.h rel='nofollow' onclick='return false;'> #else /* not __GNUC__ or HAVE_ALLOCA_H */ #ifndef _AIX /* Already did AIX, up at the top. */ char *alloca (); #endif /* not _AIX */ #endif /* not HAVE_ALLOCA_H */ #endif /* not __GNUC__ */ #endif /* not alloca */ #define REGEX_ALLOCATE alloca /* Assumes a `char *destination' variable. */ #define REGEX_REALLOCATE(source, osize, nsize) \ (destination = (char *) alloca (nsize), \ bcopy (source, destination, osize), \ destination) #endif /* not REGEX_MALLOC */ /* True if `size1' is non-NULL and PTR is pointing anywhere inside `string1' or just past its end. This works if PTR is NULL, which is a good thing. */ #define FIRST_STRING_P(ptr) \ (size1 && string1 <= (ptr) && (ptr) <= string1 + size1) /* (Re)Allocate N items of type T using malloc, or fail. */ #define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t))) #define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t))) #define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t))) #define BYTEWIDTH 8 /* In bits. */ #define STREQ(s1, s2) ((strcmp (s1, s2) == 0)) #define MAX(a, b) ((a) > (b) ? (a) : (b)) #define MIN(a, b) ((a) < (b) ? (a) : (b)) typedef char boolean; #define false 0 #define true 1 /* These are the command codes that appear in compiled regular expressions. Some opcodes are followed by argument bytes. A command code can specify any interpretation whatsoever for its arguments. Zero bytes may appear in the compiled regular expression. The value of `exactn' is needed in search.c (search_buffer) in Emacs. So regex.h defines a symbol `RE_EXACTN_VALUE' to be 1; the value of `exactn' we use here must also be 1. */ typedef enum { no_op = 0, /* Followed by one byte giving n, then by n literal bytes. */ exactn = 1, /* Matches any (more or less) character. */ anychar, /* Matches any one char belonging to specified set. First following byte is number of bitmap bytes. Then come bytes for a bitmap saying which chars are in. Bits in each byte are ordered low-bit-first. A character is in the set if its bit is 1. A character too large to have a bit in the map is automatically not in the set. */ charset, /* Same parameters as charset, but match any character that is not one of those specified. */ charset_not, /* Start remembering the text that is matched, for storing in a register. Followed by one byte with the register number, in the range 0 to one less than the pattern buffer's re_nsub field. Then followed by one byte with the number of groups inne
评论
    相关推荐
    • 调用搜索引擎
      调用百度的搜索引擎,源码亲测可用!
    • 全能搜索引擎
      一款非常不错的全能搜索引擎,内含百度,搜狗,谷歌等等所有知名浏览器,可以搜索音乐,新闻,网站等等。可以将它嵌入自己的网站,会是一个非常不错的功能,给自己的网站增添色彩。
    • ps搜索引擎
      PhpSou搜索引擎欢迎使用PHPSou开源搜索引擎,系统完全开源,适合做二次开发,
    • 图书搜索引擎
      图书搜索引擎 作为一名狂热的读者,我想搜索要阅读的新书,因此可以保留要购买的书单 给书搜索引擎一个礼物当我加载搜索引擎时,然后会显示一个菜单,其中包含“搜索书”和“登录/注册”选项以及一个用于搜索书的...
    • sphider搜索引擎
      sphider搜索引擎,一个网页资源的搜索,帮助大家及时准确构建自己的搜索环境
    • Search搜索引擎
      简单,快捷搜索引擎 方便,易懂 使每个不懂计算同的人也可以拥有自己的搜索
    • 搜索引擎
      搜索引擎,我是大三的学生,是net工作室的,基于lucene。net做的搜索引擎,很好用哦,得过奖哦!
    • 搜索引擎优化
      搜索引擎优化,运行后可以优化搜索引擎对比优化前后的效果。
    • 搜索引擎资料
      搜索引擎资料搜索引擎资料搜索引擎资料搜索引擎资料
    • jQuery仿搜索引擎搜索框下拉提示代码
      jQuery仿搜索引擎搜索框下拉提示关键词选择代码,多个搜索引擎切换,点击搜索框下拉弹出热门关键词列表,点击进行快捷搜索功能。