当前位置: 首页 > news >正文

Day 66-68 主动学习之ALEC

代码:

package dl;import java.io.FileReader;
import java.util.*;
import weka.core.Instances;/*** Active learning through density clustering.*/
public class Alec {/*** The whole dataset.*/Instances dataset;/*** The maximal number of queries that can be provided.*/int maxNumQuery;/*** The actual number of queries.*/int numQuery;/*** The radius, also dc in the paper. It is employed for density computation.*/double radius;/*** The densities of instances, also rho in the paper.*/double[] densities;/*** distanceToMaster*/double[] distanceToMaster;/*** Sorted indices, where the first element indicates the instance with the* biggest density.*/int[] descendantDensities;/*** Priority*/double[] priority;/*** The maximal distance between any pair of points.*/double maximalDistance;/*** Who is my master?*/int[] masters;/*** Predicted labels.*/int[] predictedLabels;/*** Instance status. 0 for unprocessed, 1 for queried, 2 for classified.*/int[] instanceStatusArray;/*** The descendant indices to show the representativeness of instances in a* descendant order.*/int[] descendantRepresentatives;/*** Indicate the cluster of each instance. It is only used in* clusterInTwo(int[]);*/int[] clusterIndices;/*** Blocks with size no more than this threshold should not be split further.*/int smallBlockThreshold = 3;/************************************* The constructor.** @param paraFilename*            The data filename.***********************************/public Alec(String paraFilename) {try {FileReader tempReader = new FileReader(paraFilename);dataset = new Instances(tempReader);dataset.setClassIndex(dataset.numAttributes() - 1);tempReader.close();} catch (Exception ee) {System.out.println(ee);System.exit(0);} // Of frycomputeMaximalDistance();clusterIndices = new int[dataset.numInstances()];}// Of the constructor/************************************* Merge sort in descendant order to obtain an index array. The original* array is unchanged. The method should be tested further. <br>* Examples: input [1.2, 2.3, 0.4, 0.5], output [1, 0, 3, 2]. <br>* input [3.1, 5.2, 6.3, 2.1, 4.4], output [2, 1, 4, 0, 3].** @param paraArray*            the original array* @return The sorted indices.***********************************/public static int[] mergeSortToIndices(double[] paraArray) {int tempLength = paraArray.length;int[][] resultMatrix = new int[2][tempLength];// For merge sort.// Initializeint tempIndex = 0;for (int i = 0; i < tempLength; i++) {resultMatrix[tempIndex][i] = i;} // Of for i// Mergeint tempCurrentLength = 1;// The indices for current merged groups.int tempFirstStart, tempSecondStart, tempSecondEnd;while (tempCurrentLength < tempLength) {// Divide into a number of groups.// Here the boundary is adaptive to array length not equal to 2^k.for (int i = 0; i < Math.ceil((tempLength + 0.0) / tempCurrentLength / 2); i++) {// Boundaries of the grouptempFirstStart = i * tempCurrentLength * 2;tempSecondStart = tempFirstStart + tempCurrentLength;tempSecondEnd = tempSecondStart + tempCurrentLength - 1;if (tempSecondEnd >= tempLength) {tempSecondEnd = tempLength - 1;} // Of if// Merge this groupint tempFirstIndex = tempFirstStart;int tempSecondIndex = tempSecondStart;int tempCurrentIndex = tempFirstStart;if (tempSecondStart >= tempLength) {for (int j = tempFirstIndex; j < tempLength; j++) {resultMatrix[(tempIndex + 1) % 2][tempCurrentIndex] = resultMatrix[tempIndex% 2][j];tempFirstIndex++;tempCurrentIndex++;} // Of for jbreak;} // Of ifwhile ((tempFirstIndex <= tempSecondStart - 1)&& (tempSecondIndex <= tempSecondEnd)) {if (paraArray[resultMatrix[tempIndex% 2][tempFirstIndex]] >= paraArray[resultMatrix[tempIndex% 2][tempSecondIndex]]) {resultMatrix[(tempIndex + 1) % 2][tempCurrentIndex] = resultMatrix[tempIndex% 2][tempFirstIndex];tempFirstIndex++;} else {resultMatrix[(tempIndex + 1) % 2][tempCurrentIndex] = resultMatrix[tempIndex% 2][tempSecondIndex];tempSecondIndex++;} // Of iftempCurrentIndex++;} // Of while// Remaining partfor (int j = tempFirstIndex; j < tempSecondStart; j++) {resultMatrix[(tempIndex + 1) % 2][tempCurrentIndex] = resultMatrix[tempIndex% 2][j];tempCurrentIndex++;} // Of for jfor (int j = tempSecondIndex; j <= tempSecondEnd; j++) {resultMatrix[(tempIndex + 1) % 2][tempCurrentIndex] = resultMatrix[tempIndex% 2][j];tempCurrentIndex++;} // Of for j} // Of for itempCurrentLength *= 2;tempIndex++;} // Of whilereturn resultMatrix[tempIndex % 2];}// Of mergeSortToIndices/************************ The Euclidean distance between two instances. Other distance measures* unsupported for simplicity.*** @param paraI*            The index of the first instance.* @param paraJ*            The index of the second instance.* @return The distance.**********************/public double distance(int paraI, int paraJ) {double resultDistance = 0;double tempDifference;for (int i = 0; i < dataset.numAttributes() - 1; i++) {tempDifference = dataset.instance(paraI).value(i) - dataset.instance(paraJ).value(i);resultDistance += tempDifference * tempDifference;} // Of for iresultDistance = Math.sqrt(resultDistance);return resultDistance;}// Of distance/************************************* Compute the maximal distance. The result is stored in a member variable.***********************************/public void computeMaximalDistance() {maximalDistance = 0;double tempDistance;for (int i = 0; i < dataset.numInstances(); i++) {for (int j = 0; j < dataset.numInstances(); j++) {tempDistance = distance(i, j);if (maximalDistance < tempDistance) {maximalDistance = tempDistance;} // Of if} // Of for j} // Of for iSystem.out.println("maximalDistance = " + maximalDistance);}// Of computeMaximalDistance/********************* Compute the densities using Gaussian kernel.*******************/public void computeDensitiesGaussian() {System.out.println("radius = " + radius);densities = new double[dataset.numInstances()];double tempDistance;for (int i = 0; i < dataset.numInstances(); i++) {for (int j = 0; j < dataset.numInstances(); j++) {tempDistance = distance(i, j);densities[i] += Math.exp(-tempDistance * tempDistance / radius / radius);} // Of for j} // Of for iSystem.out.println("The densities are " + Arrays.toString(densities) + "\r\n");}// Of computeDensitiesGaussian/************************************* Compute distanceToMaster, the distance to its master.***********************************/public void computeDistanceToMaster() {distanceToMaster = new double[dataset.numInstances()];masters = new int[dataset.numInstances()];descendantDensities = new int[dataset.numInstances()];instanceStatusArray = new int[dataset.numInstances()];descendantDensities = mergeSortToIndices(densities);distanceToMaster[descendantDensities[0]] = maximalDistance;double tempDistance;for (int i = 1; i < dataset.numInstances(); i++) {// Initialize.distanceToMaster[descendantDensities[i]] = maximalDistance;for (int j = 0; j <= i - 1; j++) {tempDistance = distance(descendantDensities[i], descendantDensities[j]);if (distanceToMaster[descendantDensities[i]] > tempDistance) {distanceToMaster[descendantDensities[i]] = tempDistance;masters[descendantDensities[i]] = descendantDensities[j];} // Of if} // Of for j} // Of for iSystem.out.println("First compute, masters = " + Arrays.toString(masters));System.out.println("descendantDensities = " + Arrays.toString(descendantDensities));}// Of computeDistanceToMaster/************************************* Compute priority. Element with higher priority is more likely to be* selected as a cluster center. Now it is rho * distanceToMaster. It can* also be rho^alpha * distanceToMaster.***********************************/public void computePriority() {priority = new double[dataset.numInstances()];for (int i = 0; i < dataset.numInstances(); i++) {priority[i] = densities[i] * distanceToMaster[i];} // Of for i}// Of computePriority/**************************** The block of a node should be same as its master. This recursive method* is efficient.** @param paraIndex*            The index of the given node.* @return The cluster index of the current node.**************************/public int coincideWithMaster(int paraIndex) {if (clusterIndices[paraIndex] == -1) {int tempMaster = masters[paraIndex];clusterIndices[paraIndex] = coincideWithMaster(tempMaster);} // Of ifreturn clusterIndices[paraIndex];}// Of coincideWithMaster/**************************** Cluster a block in two. According to the master tree.** @param paraBlock*            The given block.* @return The new blocks where the two most represent instances serve as*         the root.**************************/public int[][] clusterInTwo(int[] paraBlock) {// Reinitialize. In fact, only instances in the given block is// considered.Arrays.fill(clusterIndices, -1);// Initialize the cluster number of the two roots.for (int i = 0; i < 2; i++) {clusterIndices[paraBlock[i]] = i;} // Of for ifor (int i = 0; i < paraBlock.length; i++) {if (clusterIndices[paraBlock[i]] != -1) {// Already have a cluster number.continue;} // Of ifclusterIndices[paraBlock[i]] = coincideWithMaster(masters[paraBlock[i]]);} // Of for i// The sub blocks.int[][] resultBlocks = new int[2][];int tempFistBlockCount = 0;for (int i = 0; i < clusterIndices.length; i++) {if (clusterIndices[i] == 0) {tempFistBlockCount++;} // Of if} // Of for iresultBlocks[0] = new int[tempFistBlockCount];resultBlocks[1] = new int[paraBlock.length - tempFistBlockCount];// Copy. You can design shorter code when the number of clusters is// greater than 2.int tempFirstIndex = 0;int tempSecondIndex = 0;for (int i = 0; i < paraBlock.length; i++) {if (clusterIndices[paraBlock[i]] == 0) {resultBlocks[0][tempFirstIndex] = paraBlock[i];tempFirstIndex++;} else {resultBlocks[1][tempSecondIndex] = paraBlock[i];tempSecondIndex++;} // Of if} // Of for iSystem.out.println("Split (" + paraBlock.length + ") instances "+ Arrays.toString(paraBlock) + "\r\nto (" + resultBlocks[0].length + ") instances "+ Arrays.toString(resultBlocks[0]) + "\r\nand (" + resultBlocks[1].length+ ") instances " + Arrays.toString(resultBlocks[1]));return resultBlocks;}// Of clusterInTwo/************************************* Classify instances in the block by simple voting.** @param paraBlock*            The given block.***********************************/public void vote(int[] paraBlock) {int[] tempClassCounts = new int[dataset.numClasses()];for (int i = 0; i < paraBlock.length; i++) {if (instanceStatusArray[paraBlock[i]] == 1) {tempClassCounts[(int) dataset.instance(paraBlock[i]).classValue()]++;} // Of if} // Of for iint tempMaxClass = -1;int tempMaxCount = -1;for (int i = 0; i < tempClassCounts.length; i++) {if (tempMaxCount < tempClassCounts[i]) {tempMaxClass = i;tempMaxCount = tempClassCounts[i];} // Of if} // Of for i// Classify unprocessed instances.for (int i = 0; i < paraBlock.length; i++) {if (instanceStatusArray[paraBlock[i]] == 0) {predictedLabels[paraBlock[i]] = tempMaxClass;instanceStatusArray[paraBlock[i]] = 2;} // Of if} // Of for i}// Of vote/************************************* Cluster based active learning. Prepare for** @param paraRatio*            The ratio of the maximal distance as the dc.* @param paraMaxNumQuery*            The maximal number of queries for the whole dataset.* @parm paraSmallBlockThreshold The small block threshold.***********************************/public void clusterBasedActiveLearning(double paraRatio, int paraMaxNumQuery,int paraSmallBlockThreshold) {radius = maximalDistance * paraRatio;smallBlockThreshold = paraSmallBlockThreshold;maxNumQuery = paraMaxNumQuery;predictedLabels = new int[dataset.numInstances()];for (int i = 0; i < dataset.numInstances(); i++) {predictedLabels[i] = -1;} // Of for icomputeDensitiesGaussian();computeDistanceToMaster();computePriority();descendantRepresentatives = mergeSortToIndices(priority);System.out.println("descendantRepresentatives = " + Arrays.toString(descendantRepresentatives));numQuery = 0;clusterBasedActiveLearning(descendantRepresentatives);}// Of clusterBasedActiveLearning/************************************* Cluster based active learning.** @param paraBlock*            The given block. This block must be sorted according to the*            priority in descendant order.***********************************/public void clusterBasedActiveLearning(int[] paraBlock) {System.out.println("clusterBasedActiveLearning for block " + Arrays.toString(paraBlock));// Step 1. How many labels are queried for this block.int tempExpectedQueries = (int) Math.sqrt(paraBlock.length);int tempNumQuery = 0;for (int i = 0; i < paraBlock.length; i++) {if (instanceStatusArray[paraBlock[i]] == 1) {tempNumQuery++;} // Of if} // Of for i// Step 2. Vote for small blocks.if ((tempNumQuery >= tempExpectedQueries) && (paraBlock.length <= smallBlockThreshold)) {System.out.println("" + tempNumQuery + " instances are queried, vote for block: \r\n"+ Arrays.toString(paraBlock));vote(paraBlock);return;} // Of if// Step 3. Query enough labels.for (int i = 0; i < tempExpectedQueries; i++) {if (numQuery >= maxNumQuery) {System.out.println("No more queries are provided, numQuery = " + numQuery + ".");vote(paraBlock);return;} // Of ifif (instanceStatusArray[paraBlock[i]] == 0) {instanceStatusArray[paraBlock[i]] = 1;predictedLabels[paraBlock[i]] = (int) dataset.instance(paraBlock[i]).classValue();// System.out.println("Query #" + paraBlock[i] + ", numQuery = "// + numQuery);numQuery++;} // Of if} // Of for i// Step 4. Pure?int tempFirstLabel = predictedLabels[paraBlock[0]];boolean tempPure = true;for (int i = 1; i < tempExpectedQueries; i++) {if (predictedLabels[paraBlock[i]] != tempFirstLabel) {tempPure = false;break;} // Of if} // Of for iif (tempPure) {System.out.println("Classify for pure block: " + Arrays.toString(paraBlock));for (int i = tempExpectedQueries; i < paraBlock.length; i++) {if (instanceStatusArray[paraBlock[i]] == 0) {predictedLabels[paraBlock[i]] = tempFirstLabel;instanceStatusArray[paraBlock[i]] = 2;} // Of if} // Of for ireturn;} // Of if// Step 5. Split in two and process them independently.int[][] tempBlocks = clusterInTwo(paraBlock);for (int i = 0; i < 2; i++) {// Attention: recursive invoking here.clusterBasedActiveLearning(tempBlocks[i]);} // Of for i}// Of clusterBasedActiveLearning/********************** Show the statistics information.********************/public String toString() {int[] tempStatusCounts = new int[3];double tempCorrect = 0;for (int i = 0; i < dataset.numInstances(); i++) {tempStatusCounts[instanceStatusArray[i]]++;if (predictedLabels[i] == (int) dataset.instance(i).classValue()) {tempCorrect++;} // Of if} // Of for iString resultString = "(unhandled, queried, classified) = "+ Arrays.toString(tempStatusCounts);resultString += "\r\nCorrect = " + tempCorrect + ", accuracy = "+ (tempCorrect / dataset.numInstances());return resultString;}// Of toString/************************************* The entrance of the program.** @param args:*            Not used now.***********************************/public static void main(String[] args) {long tempStart = System.currentTimeMillis();System.out.println("Starting ALEC.");String arffFilename = "C:\\Users\\86183\\IdeaProjects\\deepLearning\\src\\main\\java\\resources\\iris.arff";//String arffFilename = "C:\\Users\\86183\\IdeaProjects\\deepLearning\\src\\main\\java\\resources\\mushroom.arff";Alec tempAlec = new Alec(arffFilename);// The settings for iristempAlec.clusterBasedActiveLearning(0.15, 30, 3);// The settings for mushroom// tempAlec.clusterBasedActiveLearning(0.1, 800, 3);System.out.println(tempAlec);long tempEnd = System.currentTimeMillis();System.out.println("Runtime: " + (tempEnd - tempStart) + "ms.");}// Of main
}// Of class Alec

结果:

 

http://www.lryc.cn/news/95331.html

相关文章:

  • local-path-provisioner与pvc本地磁盘挂载helm部署
  • Visio/PPT/Matlab输出300dpi以上图片【满足标准投稿要求】
  • 科技UI图标的制作
  • 微信小程序将接口返回的文件流预览导出Excel文件并转发
  • windows 安装 mongodb 数据库
  • 业务不打烊:解决软件系统升级痛点的新方法
  • csdn新星计划vue3+ts+antd赛道——利用inscode搭建vue3(ts)+antd前端模板
  • 通过 CSS 的样式实现语音发送动效类似声音震动的效果
  • 【C#】.Net Framework框架使用JWT
  • SQL高级教程第三章
  • vue 3.0 下载本地pdf文件
  • 平板用的触控笔什么牌子好?ipad第三方电容笔推荐
  • 【Unity2D】相机移动以及设置相机边界
  • 和chatgpt学架构04-路由开发
  • Spring MVC异常处理【单个控制异常处理器、全局异常处理器、自定义异常处理器】
  • 使用3ds Max粒子系统创建飞天箭雨特效场景
  • 【朴素贝叶斯实例】
  • MPAS跨尺度、可变分辨率模式
  • 微信小程序对接SSE接口记录
  • Ngrok 的绝佳替代品,内网穿透神器 Serveo
  • 网络知识点之-路由
  • input 框如何移动光标,设置光标位置?
  • linux内核系统调用学习5:SYSCALL_DEFINE<0-6>
  • maven镜像仓库配置(多镜像自动切换)
  • ChatGPT在智能监控和安防系统中的应用如何?
  • 【Spring Boot Admin】介绍以及使用
  • 本地私有仓库部署、docker--harbor私有仓库部署和管理
  • java根据模板导出word
  • spring学习笔记十四
  • 【springmvc部分功能源码仿写第一步】实现java对目录下所有文件的遍历