Java 插入数据到Elasticsearch中进行各种类型文档的内容检索

admin2024-05-15  0

源码下载:链接:https://pan.baidu.com/s/1D3yszkTzjwQz0vFRozQl2g?pwd=z6kb

提取码:z6kb

Java 插入数据到Elasticsearch中进行各种类型文档的内容检索,在这里插入图片描述,第1张

实现思路

1.搭建一个新的springboot项目,不会的请看我这篇博客:springboot项目搭建

2.添加maven依赖

		<dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-web</artifactId>
        </dependency>
        <dependency>
            <groupId>org.apache.tika</groupId>
            <artifactId>tika-core</artifactId>
            <version>1.27</version>
        </dependency>
        <dependency>
            <groupId>org.elasticsearch</groupId>
            <artifactId>elasticsearch</artifactId>
            <version>7.10.0</version>
        </dependency>
        <dependency>
            <groupId>org.elasticsearch.client</groupId>
            <artifactId>elasticsearch-rest-high-level-client</artifactId>
            <version>7.10.0</version>
        </dependency>
        <dependency>
            <groupId>org.apache.commons</groupId>
            <artifactId>commons-lang3</artifactId>
            <version>3.6</version>
        </dependency>
        <dependency>
            <groupId>com.alibaba</groupId>
            <artifactId>fastjson</artifactId>
            <version>1.2.83</version>
        </dependency>
        <dependency>
            <groupId>org.projectlombok</groupId>
            <artifactId>lombok</artifactId>
            <version>1.18.20</version>
        </dependency>
        <dependency>
            <groupId>org.apache.pdfbox</groupId>
            <artifactId>pdfbox</artifactId>
            <version>2.0.25</version>
        </dependency>

3.创建一个类,复制代码,执行main方

package com.demo.controller;

import com.demo.bean.FileBean;
import org.apache.http.HttpHost;
import org.apache.tika.Tika;
import org.apache.tika.mime.MediaType;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestClientBuilder;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.common.xcontent.XContentType;
import com.alibaba.fastjson.JSON;
import java.io.File;
import java.io.IOException;
import java.net.URISyntaxException;
import java.net.URL;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.Base64;

public class EsController {
    private static EsController FileToBase64;

    // 使用tika库自动获取文件类型
    public static String getFileTypeByDefaultTika(String filePathUrl) throws IOException, URISyntaxException {
        // 从 URL 创建一个 File 对象
        File file = new File(new URL("file:///" + filePathUrl).toURI());
        // 使用 Tika 来检测文件的 MIME 类型
        Tika tika = new Tika();
        MediaType mediaType = MediaType.parse(tika.detect(file));

        // 从 MIME 类型中提取文件的基本类型(如 pdf、image、video 等)
        String fileType = mediaType.getSubtype();
        return fileType;
    }

    // 转换文件为base64
    public static String fileToBase64(String filePath) throws IOException {
        byte[] fileContent = Files.readAllBytes(Paths.get(filePath));
        return Base64.getEncoder().encodeToString(fileContent);
    }


    // 根据文件类型判断排除音视频类文件
    public static String fileFilterate(String pathUrl) {
        try {
            String fileType = getFileTypeByDefaultTika(pathUrl);

            if (!fileType.contains("video")
                    && !fileType.contains("image")
                    && !"application/zip".equals(fileType)) {
                return fileToBase64(pathUrl);
            }
            return "";
        } catch (IOException e) {
            e.printStackTrace();
            return "";
        } catch (URISyntaxException e) {
            e.printStackTrace();
            return "";
        }
    }

    public static void main(String[] args) throws IOException {

        // 初始化RestHighLevelClient,localhost就是ES的ip地址,端口号为9200
        RestClientBuilder builder = RestClient.builder(new HttpHost("localhost", 9200, "http"));
        RestHighLevelClient client = new RestHighLevelClient(builder);

        //文件转成base,存入ES中
        String path = "C:\Users\83677\Desktop\测试4.docx";
        String file_base64 = FileToBase64.fileFilterate(path);
        //拿到base64,存入ES中
        FileBean filebean = new FileBean();
        filebean.setFile_id("1");
        filebean.setFile_name("测试4.docx");
        filebean.setFile_url("http://文件存储地址:8080/xxx/docs/raw/master/性能分析与内存问题排查思考.pdf");
        filebean.setFile_type("docx");
        filebean.setContent(file_base64);
        filebean.setFile_size("33");
        filebean.setFile_dir_name("yryy");
        filebean.setFile_suffix(".docx");
        filebean.setGroup_file_id("1234653");

        //把实体对象转为字符串
        String body = JSON.toJSONString(filebean);
        //file_data 是索引名称,这里就是插入数据到ES的核心部分
        IndexRequest indexRequest = new IndexRequest().index("file_data")
                .source(body, XContentType.JSON)//请求参数,类型为JSON
                .setPipeline("attachment") //上传时使用attachment pipline进行提取文件
                .timeout(TimeValue.timeValueMinutes(10));

        client.index(indexRequest, RequestOptions.DEFAULT);

        // 关闭客户端
        client.close();
    }
}

4.实体类对象,我的示例

package com.demo.bean;

import lombok.Data;

@Data
public class FileBean {
    private String file_id;
    private String file_name;
    private String file_url;
    private String file_type;
    private String content;
    private String group_file_id;
    private String file_suffix;
    private String file_size;
    private String file_dir_name;

}

本文来自互联网用户投稿,该文观点仅代表作者本人,不代表本站立场。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如若转载,请注明原文出处。如若内容造成侵权/违法违规/事实不符,请联系SD编程学习网:675289112@qq.com进行投诉反馈,一经查实,立即删除!